r600_exa.c revision 2f39173d
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_macros.h"
37#include "radeon_reg.h"
38#include "r600_shader.h"
39#include "r600_reg.h"
40#include "r600_state.h"
41#include "radeon_vbo.h"
42
43#define RADEON_TRACE_FALL 0
44#define RADEON_TRACE_DRAW 0
45
46#if RADEON_TRACE_FALL
47#define RADEON_FALLBACK(x)     		\
48do {					\
49	ErrorF("%s: ", __FUNCTION__);	\
50	ErrorF x;			\
51	return FALSE;			\
52} while (0)
53#else
54#define RADEON_FALLBACK(x) return FALSE
55#endif
56
57extern PixmapPtr
58RADEONGetDrawablePixmap(DrawablePtr pDrawable);
59
60/* #define SHOW_VERTEXES */
61
62#       define RADEON_ROP3_ZERO             0x00000000
63#       define RADEON_ROP3_DSa              0x00880000
64#       define RADEON_ROP3_SDna             0x00440000
65#       define RADEON_ROP3_S                0x00cc0000
66#       define RADEON_ROP3_DSna             0x00220000
67#       define RADEON_ROP3_D                0x00aa0000
68#       define RADEON_ROP3_DSx              0x00660000
69#       define RADEON_ROP3_DSo              0x00ee0000
70#       define RADEON_ROP3_DSon             0x00110000
71#       define RADEON_ROP3_DSxn             0x00990000
72#       define RADEON_ROP3_Dn               0x00550000
73#       define RADEON_ROP3_SDno             0x00dd0000
74#       define RADEON_ROP3_Sn               0x00330000
75#       define RADEON_ROP3_DSno             0x00bb0000
76#       define RADEON_ROP3_DSan             0x00770000
77#       define RADEON_ROP3_ONE              0x00ff0000
78
79uint32_t RADEON_ROP[16] = {
80    RADEON_ROP3_ZERO, /* GXclear        */
81    RADEON_ROP3_DSa,  /* Gxand          */
82    RADEON_ROP3_SDna, /* GXandReverse   */
83    RADEON_ROP3_S,    /* GXcopy         */
84    RADEON_ROP3_DSna, /* GXandInverted  */
85    RADEON_ROP3_D,    /* GXnoop         */
86    RADEON_ROP3_DSx,  /* GXxor          */
87    RADEON_ROP3_DSo,  /* GXor           */
88    RADEON_ROP3_DSon, /* GXnor          */
89    RADEON_ROP3_DSxn, /* GXequiv        */
90    RADEON_ROP3_Dn,   /* GXinvert       */
91    RADEON_ROP3_SDno, /* GXorReverse    */
92    RADEON_ROP3_Sn,   /* GXcopyInverted */
93    RADEON_ROP3_DSno, /* GXorInverted   */
94    RADEON_ROP3_DSan, /* GXnand         */
95    RADEON_ROP3_ONE,  /* GXset          */
96};
97
98static void R600VlineHelperClear(ScrnInfoPtr pScrn)
99{
100    RADEONInfoPtr info = RADEONPTR(pScrn);
101    struct radeon_accel_state *accel_state = info->accel_state;
102
103    accel_state->vline_crtc = NULL;
104    accel_state->vline_y1 = -1;
105    accel_state->vline_y2 = 0;
106}
107
108static void R600VlineHelperSet(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
109{
110    RADEONInfoPtr info = RADEONPTR(pScrn);
111    struct radeon_accel_state *accel_state = info->accel_state;
112
113    accel_state->vline_crtc = radeon_pick_best_crtc(pScrn, x1, x2, y1, y2);
114    if (accel_state->vline_y1 == -1)
115	accel_state->vline_y1 = y1;
116    if (y1 < accel_state->vline_y1)
117	accel_state->vline_y1 = y1;
118    if (y2 > accel_state->vline_y2)
119	accel_state->vline_y2 = y2;
120}
121
122static Bool R600ValidPM(uint32_t pm, int bpp)
123{
124    uint8_t r, g, b, a;
125    Bool ret = FALSE;
126
127    switch (bpp) {
128    case 8:
129	a = pm & 0xff;
130	if ((a == 0) || (a == 0xff))
131	    ret = TRUE;
132	break;
133    case 16:
134	r = (pm >> 11) & 0x1f;
135	g = (pm >> 5) & 0x3f;
136	b = (pm >> 0) & 0x1f;
137	if (((r == 0) || (r == 0x1f)) &&
138	    ((g == 0) || (g == 0x3f)) &&
139	    ((b == 0) || (b == 0x1f)))
140	    ret = TRUE;
141	break;
142    case 32:
143	a = (pm >> 24) & 0xff;
144	r = (pm >> 16) & 0xff;
145	g = (pm >> 8) & 0xff;
146	b = (pm >> 0) & 0xff;
147	if (((a == 0) || (a == 0xff)) &&
148	    ((r == 0) || (r == 0xff)) &&
149	    ((g == 0) || (g == 0xff)) &&
150	    ((b == 0) || (b == 0xff)))
151	    ret = TRUE;
152	break;
153    default:
154	break;
155    }
156    return ret;
157}
158
159static Bool R600CheckBPP(int bpp)
160{
161	switch (bpp) {
162	case 8:
163	case 16:
164	case 32:
165		return TRUE;
166	default:
167		break;
168	}
169	return FALSE;
170}
171
172Bool
173R600SetAccelState(ScrnInfoPtr pScrn,
174		  struct r600_accel_object *src0,
175		  struct r600_accel_object *src1,
176		  struct r600_accel_object *dst,
177		  uint32_t vs_offset, uint32_t ps_offset,
178		  int rop, Pixel planemask)
179{
180    RADEONInfoPtr info = RADEONPTR(pScrn);
181    struct radeon_accel_state *accel_state = info->accel_state;
182    int ret;
183
184    if (src0) {
185	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
186	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
187    } else {
188	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
189	accel_state->src_size[0] = 0;
190    }
191
192    if (src1) {
193	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
194	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
195    } else {
196	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
197	accel_state->src_size[1] = 0;
198    }
199
200    if (dst) {
201	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
202	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
203    } else {
204	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
205	accel_state->dst_size = 0;
206    }
207
208    accel_state->rop = rop;
209    accel_state->planemask = planemask;
210
211    /* bad pitch */
212    if (accel_state->src_obj[0].pitch & 7)
213	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
214
215    /* bad offset */
216    if (accel_state->src_obj[0].offset & 0xff)
217	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
218
219    /* bad pitch */
220    if (accel_state->src_obj[1].pitch & 7)
221	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
222
223    /* bad offset */
224    if (accel_state->src_obj[1].offset & 0xff)
225	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
226
227    if (accel_state->dst_obj.pitch & 7)
228	RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
229
230    if (accel_state->dst_obj.offset & 0xff)
231	RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
232
233    accel_state->vs_size = 512;
234    accel_state->ps_size = 512;
235#if defined(XF86DRM_MODE)
236    if (info->cs) {
237	accel_state->vs_mc_addr = vs_offset;
238	accel_state->ps_mc_addr = ps_offset;
239
240	radeon_cs_space_reset_bos(info->cs);
241	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
242					  RADEON_GEM_DOMAIN_VRAM, 0);
243	if (accel_state->src_obj[0].bo)
244	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
245					      accel_state->src_obj[0].domain, 0);
246	if (accel_state->src_obj[1].bo)
247	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
248					      accel_state->src_obj[1].domain, 0);
249	if (accel_state->dst_obj.bo)
250	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
251					      0, accel_state->dst_obj.domain);
252	ret = radeon_cs_space_check(info->cs);
253	if (ret)
254	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
255
256    } else
257#endif
258    {
259	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
260	    vs_offset;
261	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
262	    ps_offset;
263    }
264
265    return TRUE;
266}
267
268#if defined(XF86DRM_MODE)
269static inline void radeon_add_pixmap(struct radeon_cs *cs, PixmapPtr pPix, int read_domains, int write_domain)
270{
271    struct radeon_exa_pixmap_priv *driver_priv = exaGetPixmapDriverPrivate(pPix);
272
273    radeon_cs_space_add_persistent_bo(cs, driver_priv->bo, read_domains, write_domain);
274}
275#endif
276
277static void
278R600DoneSolid(PixmapPtr pPix);
279
280static void
281R600DoneComposite(PixmapPtr pDst);
282
283
284static Bool
285R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
286{
287    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
288    RADEONInfoPtr info = RADEONPTR(pScrn);
289    struct radeon_accel_state *accel_state = info->accel_state;
290    cb_config_t     cb_conf;
291    shader_config_t vs_conf, ps_conf;
292    int pmask = 0;
293    uint32_t a, r, g, b;
294    float ps_alu_consts[4];
295    struct r600_accel_object dst;
296
297    if (!R600CheckBPP(pPix->drawable.bitsPerPixel))
298	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
299    if (!R600ValidPM(pm, pPix->drawable.bitsPerPixel))
300	RADEON_FALLBACK(("invalid planemask\n"));
301
302#if defined(XF86DRM_MODE)
303    if (info->cs) {
304	dst.offset = 0;
305	dst.bo = radeon_get_pixmap_bo(pPix);
306    } else
307#endif
308    {
309	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
310	dst.bo = NULL;
311    }
312
313    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
314    dst.width = pPix->drawable.width;
315    dst.height = pPix->drawable.height;
316    dst.bpp = pPix->drawable.bitsPerPixel;
317    dst.domain = RADEON_GEM_DOMAIN_VRAM;
318
319    if (!R600SetAccelState(pScrn,
320			   NULL,
321			   NULL,
322			   &dst,
323			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
324			   alu, pm))
325	return FALSE;
326
327    CLEAR (cb_conf);
328    CLEAR (vs_conf);
329    CLEAR (ps_conf);
330
331    radeon_vbo_check(pScrn, 16);
332    r600_cp_start(pScrn);
333
334    set_default_state(pScrn, accel_state->ib);
335
336    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
337    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
338    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
339
340    /* Shader */
341
342    /* flush SQ cache */
343    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
344			accel_state->vs_size, accel_state->vs_mc_addr,
345			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
346
347    vs_conf.shader_addr         = accel_state->vs_mc_addr;
348    vs_conf.num_gprs            = 2;
349    vs_conf.stack_size          = 0;
350    vs_conf.bo                  = accel_state->shaders_bo;
351    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
352
353    /* flush SQ cache */
354    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
355			accel_state->ps_size, accel_state->ps_mc_addr,
356			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
357
358    ps_conf.shader_addr         = accel_state->ps_mc_addr;
359    ps_conf.num_gprs            = 1;
360    ps_conf.stack_size          = 0;
361    ps_conf.uncached_first_inst = 1;
362    ps_conf.clamp_consts        = 0;
363    ps_conf.export_mode         = 2;
364    ps_conf.bo                  = accel_state->shaders_bo;
365    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
366
367    /* Render setup */
368    if (accel_state->planemask & 0x000000ff)
369	pmask |= 4; /* B */
370    if (accel_state->planemask & 0x0000ff00)
371	pmask |= 2; /* G */
372    if (accel_state->planemask & 0x00ff0000)
373	pmask |= 1; /* R */
374    if (accel_state->planemask & 0xff000000)
375	pmask |= 8; /* A */
376    BEGIN_BATCH(6);
377    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
378    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[accel_state->rop]);
379    END_BATCH();
380
381    cb_conf.id = 0;
382    cb_conf.w = accel_state->dst_obj.pitch;
383    cb_conf.h = accel_state->dst_obj.height;
384    cb_conf.base = accel_state->dst_obj.offset;
385    cb_conf.bo = accel_state->dst_obj.bo;
386
387    if (accel_state->dst_obj.bpp == 8) {
388	cb_conf.format = COLOR_8;
389	cb_conf.comp_swap = 3; /* A */
390    } else if (accel_state->dst_obj.bpp == 16) {
391	cb_conf.format = COLOR_5_6_5;
392	cb_conf.comp_swap = 2; /* RGB */
393    } else {
394	cb_conf.format = COLOR_8_8_8_8;
395	cb_conf.comp_swap = 1; /* ARGB */
396    }
397    cb_conf.source_format = 1;
398    cb_conf.blend_clamp = 1;
399    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
400
401    /* Interpolator setup */
402    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
403    BEGIN_BATCH(18);
404    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
405    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
406
407    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
408     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
409    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
410    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 (0 << NUM_INTERP_shift));
411    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
412    /* color semantic id 0 -> GPR[0] */
413    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
414								  (0x03 << DEFAULT_VAL_shift)	|
415								  FLAT_SHADE_bit		|
416								  SEL_CENTROID_bit));
417    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                FLAT_SHADE_ENA_bit);
418    END_BATCH();
419
420    /* PS alu constants */
421    if (accel_state->dst_obj.bpp == 16) {
422	r = (fg >> 11) & 0x1f;
423	g = (fg >> 5) & 0x3f;
424	b = (fg >> 0) & 0x1f;
425	ps_alu_consts[0] = (float)r / 31; /* R */
426	ps_alu_consts[1] = (float)g / 63; /* G */
427	ps_alu_consts[2] = (float)b / 31; /* B */
428	ps_alu_consts[3] = 1.0; /* A */
429    } else if (accel_state->dst_obj.bpp == 8) {
430	a = (fg >> 0) & 0xff;
431	ps_alu_consts[0] = 0.0; /* R */
432	ps_alu_consts[1] = 0.0; /* G */
433	ps_alu_consts[2] = 0.0; /* B */
434	ps_alu_consts[3] = (float)a / 255; /* A */
435    } else {
436	a = (fg >> 24) & 0xff;
437	r = (fg >> 16) & 0xff;
438	g = (fg >> 8) & 0xff;
439	b = (fg >> 0) & 0xff;
440	ps_alu_consts[0] = (float)r / 255; /* R */
441	ps_alu_consts[1] = (float)g / 255; /* G */
442	ps_alu_consts[2] = (float)b / 255; /* B */
443	ps_alu_consts[3] = (float)a / 255; /* A */
444    }
445    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
446		   sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
447
448    if (accel_state->vsync)
449	R600VlineHelperClear(pScrn);
450
451    return TRUE;
452}
453
454
455static void
456R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
457{
458    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
459    RADEONInfoPtr info = RADEONPTR(pScrn);
460    struct radeon_accel_state *accel_state = info->accel_state;
461    float *vb;
462
463    if (accel_state->vsync)
464	R600VlineHelperSet(pScrn, x1, y1, x2, y2);
465
466    vb = radeon_vbo_space(pScrn, 8);
467
468    vb[0] = (float)x1;
469    vb[1] = (float)y1;
470
471    vb[2] = (float)x1;
472    vb[3] = (float)y2;
473
474    vb[4] = (float)x2;
475    vb[5] = (float)y2;
476
477    radeon_vbo_commit(pScrn);
478}
479
480static void
481R600DoneSolid(PixmapPtr pPix)
482{
483    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
484    RADEONInfoPtr info = RADEONPTR(pScrn);
485    struct radeon_accel_state *accel_state = info->accel_state;
486
487    if (accel_state->vsync)
488	cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
489			   accel_state->vline_crtc,
490			   accel_state->vline_y1,
491			   accel_state->vline_y2);
492
493    r600_finish_op(pScrn, 8);
494}
495
496static void
497R600DoPrepareCopy(ScrnInfoPtr pScrn)
498{
499    RADEONInfoPtr info = RADEONPTR(pScrn);
500    struct radeon_accel_state *accel_state = info->accel_state;
501    int pmask = 0;
502    cb_config_t     cb_conf;
503    tex_resource_t  tex_res;
504    tex_sampler_t   tex_samp;
505    shader_config_t vs_conf, ps_conf;
506
507    CLEAR (cb_conf);
508    CLEAR (tex_res);
509    CLEAR (tex_samp);
510    CLEAR (vs_conf);
511    CLEAR (ps_conf);
512
513    radeon_vbo_check(pScrn, 16);
514    r600_cp_start(pScrn);
515
516    set_default_state(pScrn, accel_state->ib);
517
518    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
519    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
520    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
521
522    /* Shader */
523
524    /* flush SQ cache */
525    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
526			accel_state->vs_size, accel_state->vs_mc_addr,
527			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
528
529    vs_conf.shader_addr         = accel_state->vs_mc_addr;
530    vs_conf.num_gprs            = 2;
531    vs_conf.stack_size          = 0;
532    vs_conf.bo                  = accel_state->shaders_bo;
533    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
534
535    /* flush SQ cache */
536    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
537			accel_state->ps_size, accel_state->ps_mc_addr,
538			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
539
540    ps_conf.shader_addr         = accel_state->ps_mc_addr;
541    ps_conf.num_gprs            = 1;
542    ps_conf.stack_size          = 0;
543    ps_conf.uncached_first_inst = 1;
544    ps_conf.clamp_consts        = 0;
545    ps_conf.export_mode         = 2;
546    ps_conf.bo                  = accel_state->shaders_bo;
547    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
548
549    /* flush texture cache */
550    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
551			accel_state->src_size[0], accel_state->src_obj[0].offset,
552			accel_state->src_obj[0].bo, accel_state->src_obj[0].domain, 0);
553
554    /* Texture */
555    tex_res.id                  = 0;
556    tex_res.w                   = accel_state->src_obj[0].width;
557    tex_res.h                   = accel_state->src_obj[0].height;
558    tex_res.pitch               = accel_state->src_obj[0].pitch;
559    tex_res.depth               = 0;
560    tex_res.dim                 = SQ_TEX_DIM_2D;
561    tex_res.base                = accel_state->src_obj[0].offset;
562    tex_res.mip_base            = accel_state->src_obj[0].offset;
563    tex_res.bo                  = accel_state->src_obj[0].bo;
564    tex_res.mip_bo              = accel_state->src_obj[0].bo;
565    if (accel_state->src_obj[0].bpp == 8) {
566	tex_res.format              = FMT_8;
567	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
568	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
569	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
570	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
571    } else if (accel_state->src_obj[0].bpp == 16) {
572	tex_res.format              = FMT_5_6_5;
573	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
574	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
575	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
576	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
577    } else {
578	tex_res.format              = FMT_8_8_8_8;
579	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
580	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
581	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
582	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
583    }
584
585    tex_res.request_size        = 1;
586    tex_res.base_level          = 0;
587    tex_res.last_level          = 0;
588    tex_res.perf_modulation     = 0;
589    set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
590
591    tex_samp.id                 = 0;
592    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
593    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
594    tex_samp.clamp_z            = SQ_TEX_WRAP;
595    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
596    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
597    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
598    tex_samp.mip_filter         = 0;			/* no mipmap */
599    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
600
601
602    /* Render setup */
603    if (accel_state->planemask & 0x000000ff)
604	pmask |= 4; /* B */
605    if (accel_state->planemask & 0x0000ff00)
606	pmask |= 2; /* G */
607    if (accel_state->planemask & 0x00ff0000)
608	pmask |= 1; /* R */
609    if (accel_state->planemask & 0xff000000)
610	pmask |= 8; /* A */
611    BEGIN_BATCH(6);
612    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
613    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[accel_state->rop]);
614    END_BATCH();
615
616    cb_conf.id = 0;
617    cb_conf.w = accel_state->dst_obj.pitch;
618    cb_conf.h = accel_state->dst_obj.height;
619    cb_conf.base = accel_state->dst_obj.offset;
620    cb_conf.bo = accel_state->dst_obj.bo;
621    if (accel_state->dst_obj.bpp == 8) {
622	cb_conf.format = COLOR_8;
623	cb_conf.comp_swap = 3; /* A */
624    } else if (accel_state->dst_obj.bpp == 16) {
625	cb_conf.format = COLOR_5_6_5;
626	cb_conf.comp_swap = 2; /* RGB */
627    } else {
628	cb_conf.format = COLOR_8_8_8_8;
629	cb_conf.comp_swap = 1; /* ARGB */
630    }
631    cb_conf.source_format = 1;
632    cb_conf.blend_clamp = 1;
633    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
634
635    /* Interpolator setup */
636    /* export tex coord from VS */
637    BEGIN_BATCH(18);
638    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
639    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
640
641    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
642     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
643    /* input tex coord from VS */
644    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 ((1 << NUM_INTERP_shift)));
645    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
646    /* color semantic id 0 -> GPR[0] */
647    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
648								(0x01 << DEFAULT_VAL_shift)	|
649								SEL_CENTROID_bit));
650    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
651    END_BATCH();
652
653}
654
655static void
656R600DoCopy(ScrnInfoPtr pScrn)
657{
658    r600_finish_op(pScrn, 16);
659}
660
661static void
662R600DoCopyVline(PixmapPtr pPix)
663{
664    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
665    RADEONInfoPtr info = RADEONPTR(pScrn);
666    struct radeon_accel_state *accel_state = info->accel_state;
667
668    if (accel_state->vsync)
669	cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
670			   accel_state->vline_crtc,
671			   accel_state->vline_y1,
672			   accel_state->vline_y2);
673
674    r600_finish_op(pScrn, 16);
675}
676
677static void
678R600AppendCopyVertex(ScrnInfoPtr pScrn,
679		     int srcX, int srcY,
680		     int dstX, int dstY,
681		     int w, int h)
682{
683    float *vb;
684
685    vb = radeon_vbo_space(pScrn, 16);
686
687    vb[0] = (float)dstX;
688    vb[1] = (float)dstY;
689    vb[2] = (float)srcX;
690    vb[3] = (float)srcY;
691
692    vb[4] = (float)dstX;
693    vb[5] = (float)(dstY + h);
694    vb[6] = (float)srcX;
695    vb[7] = (float)(srcY + h);
696
697    vb[8] = (float)(dstX + w);
698    vb[9] = (float)(dstY + h);
699    vb[10] = (float)(srcX + w);
700    vb[11] = (float)(srcY + h);
701
702    radeon_vbo_commit(pScrn);
703}
704
705static Bool
706R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
707		int xdir, int ydir,
708		int rop,
709		Pixel planemask)
710{
711    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
712    RADEONInfoPtr info = RADEONPTR(pScrn);
713    struct radeon_accel_state *accel_state = info->accel_state;
714    struct r600_accel_object src_obj, dst_obj;
715
716    if (!R600CheckBPP(pSrc->drawable.bitsPerPixel))
717	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
718    if (!R600CheckBPP(pDst->drawable.bitsPerPixel))
719	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
720    if (!R600ValidPM(planemask, pDst->drawable.bitsPerPixel))
721	RADEON_FALLBACK(("Invalid planemask\n"));
722
723    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
724    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
725
726    accel_state->same_surface = FALSE;
727
728#if defined(XF86DRM_MODE)
729    if (info->cs) {
730	src_obj.offset = 0;
731	dst_obj.offset = 0;
732	src_obj.bo = radeon_get_pixmap_bo(pSrc);
733	dst_obj.bo = radeon_get_pixmap_bo(pDst);
734	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
735	    accel_state->same_surface = TRUE;
736    } else
737#endif
738    {
739	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
740	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
741	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
742	    accel_state->same_surface = TRUE;
743	src_obj.bo = NULL;
744	dst_obj.bo = NULL;
745    }
746
747    src_obj.width = pSrc->drawable.width;
748    src_obj.height = pSrc->drawable.height;
749    src_obj.bpp = pSrc->drawable.bitsPerPixel;
750    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
751
752    dst_obj.width = pDst->drawable.width;
753    dst_obj.height = pDst->drawable.height;
754    dst_obj.bpp = pDst->drawable.bitsPerPixel;
755    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
756
757    if (!R600SetAccelState(pScrn,
758			   &src_obj,
759			   NULL,
760			   &dst_obj,
761			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
762			   rop, planemask))
763	return FALSE;
764
765    if (accel_state->same_surface == TRUE) {
766	unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
767
768#if defined(XF86DRM_MODE)
769	if (info->cs) {
770	    if (accel_state->copy_area_bo) {
771		radeon_bo_unref(accel_state->copy_area_bo);
772		accel_state->copy_area_bo = NULL;
773	    }
774	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
775						       RADEON_GEM_DOMAIN_VRAM,
776						       0);
777	    if (accel_state->copy_area_bo == NULL)
778		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
779
780	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
781					      RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
782	    if (radeon_cs_space_check(info->cs)) {
783		radeon_bo_unref(accel_state->copy_area_bo);
784		accel_state->copy_area_bo = NULL;
785		return FALSE;
786	    }
787	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
788	} else
789#endif
790	{
791	    if (accel_state->copy_area) {
792		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
793		accel_state->copy_area = NULL;
794	    }
795	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
796	    if (!accel_state->copy_area)
797		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
798	}
799    } else
800	R600DoPrepareCopy(pScrn);
801
802    if (accel_state->vsync)
803	R600VlineHelperClear(pScrn);
804
805    return TRUE;
806}
807
808static void
809R600Copy(PixmapPtr pDst,
810	 int srcX, int srcY,
811	 int dstX, int dstY,
812	 int w, int h)
813{
814    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
815    RADEONInfoPtr info = RADEONPTR(pScrn);
816    struct radeon_accel_state *accel_state = info->accel_state;
817
818    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
819	return;
820
821    if (accel_state->vsync)
822	R600VlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
823
824    if (accel_state->same_surface && accel_state->copy_area) {
825	uint32_t orig_offset, tmp_offset;
826	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
827	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
828	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
829
830#if defined(XF86DRM_MODE)
831	if (info->cs) {
832	    tmp_offset = 0;
833	    orig_offset = 0;
834	} else
835#endif
836	{
837	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
838	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
839	}
840
841	/* src to tmp */
842	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
843	accel_state->dst_obj.bo = accel_state->copy_area_bo;
844	accel_state->dst_obj.offset = tmp_offset;
845	R600DoPrepareCopy(pScrn);
846	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
847	R600DoCopy(pScrn);
848
849	/* tmp to dst */
850	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
851	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
852	accel_state->src_obj[0].offset = tmp_offset;
853	accel_state->dst_obj.domain = orig_dst_domain;
854	accel_state->dst_obj.bo = orig_bo;
855	accel_state->dst_obj.offset = orig_offset;
856	R600DoPrepareCopy(pScrn);
857	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
858	R600DoCopyVline(pDst);
859
860	/* restore state */
861	accel_state->src_obj[0].domain = orig_src_domain;
862	accel_state->src_obj[0].bo = orig_bo;
863	accel_state->src_obj[0].offset = orig_offset;
864    } else
865	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
866
867}
868
869static void
870R600DoneCopy(PixmapPtr pDst)
871{
872    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
873    RADEONInfoPtr info = RADEONPTR(pScrn);
874    struct radeon_accel_state *accel_state = info->accel_state;
875
876    if (!accel_state->same_surface)
877	R600DoCopyVline(pDst);
878
879    if (accel_state->copy_area) {
880	if (!info->cs)
881	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
882	accel_state->copy_area = NULL;
883    }
884
885}
886
887
888#define xFixedToFloat(f) (((float) (f)) / 65536)
889
890struct blendinfo {
891    Bool dst_alpha;
892    Bool src_alpha;
893    uint32_t blend_cntl;
894};
895
896static struct blendinfo R600BlendOp[] = {
897    /* Clear */
898    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
899    /* Src */
900    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
901    /* Dst */
902    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
903    /* Over */
904    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
905    /* OverReverse */
906    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
907    /* In */
908    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
909    /* InReverse */
910    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
911    /* Out */
912    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
913    /* OutReverse */
914    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
915    /* Atop */
916    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
917    /* AtopReverse */
918    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
919    /* Xor */
920    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
921    /* Add */
922    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
923};
924
925struct formatinfo {
926    unsigned int fmt;
927    uint32_t card_fmt;
928};
929
930static struct formatinfo R600TexFormats[] = {
931    {PICT_a8r8g8b8,	FMT_8_8_8_8},
932    {PICT_x8r8g8b8,	FMT_8_8_8_8},
933    {PICT_a8b8g8r8,	FMT_8_8_8_8},
934    {PICT_x8b8g8r8,	FMT_8_8_8_8},
935#ifdef PICT_TYPE_BGRA
936    {PICT_b8g8r8a8,	FMT_8_8_8_8},
937    {PICT_b8g8r8x8,	FMT_8_8_8_8},
938#endif
939    {PICT_r5g6b5,	FMT_5_6_5},
940    {PICT_a1r5g5b5,	FMT_1_5_5_5},
941    {PICT_x1r5g5b5,     FMT_1_5_5_5},
942    {PICT_a8,		FMT_8},
943};
944
945static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
946{
947    uint32_t sblend, dblend;
948
949    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
950    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
951
952    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
953     * it as always 1.
954     */
955    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
956	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
957	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
958	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
959	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
960    }
961
962    /* If the source alpha is being used, then we should only be in a case where
963     * the source blend factor is 0, and the source blend value is the mask
964     * channels multiplied by the source picture's alpha.
965     */
966    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
967	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
968	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
969	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
970	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
971	}
972    }
973
974    return sblend | dblend;
975}
976
977static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
978{
979    switch (pDstPicture->format) {
980    case PICT_a8r8g8b8:
981    case PICT_x8r8g8b8:
982    case PICT_a8b8g8r8:
983    case PICT_x8b8g8r8:
984#ifdef PICT_TYPE_BGRA
985    case PICT_b8g8r8a8:
986    case PICT_b8g8r8x8:
987#endif
988	*dst_format = COLOR_8_8_8_8;
989	break;
990    case PICT_r5g6b5:
991	*dst_format = COLOR_5_6_5;
992	break;
993    case PICT_a1r5g5b5:
994    case PICT_x1r5g5b5:
995	*dst_format = COLOR_1_5_5_5;
996	break;
997    case PICT_a8:
998	*dst_format = COLOR_8;
999	break;
1000    default:
1001	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
1002	       (int)pDstPicture->format));
1003    }
1004    return TRUE;
1005}
1006
1007static Bool R600CheckCompositeTexture(PicturePtr pPict,
1008				      PicturePtr pDstPict,
1009				      int op,
1010				      int unit)
1011{
1012    int w = pPict->pDrawable->width;
1013    int h = pPict->pDrawable->height;
1014    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1015    unsigned int i;
1016    int max_tex_w, max_tex_h;
1017
1018    max_tex_w = 8192;
1019    max_tex_h = 8192;
1020
1021    if ((w > max_tex_w) || (h > max_tex_h))
1022	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1023
1024    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1025	if (R600TexFormats[i].fmt == pPict->format)
1026	    break;
1027    }
1028    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
1029	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1030			 (int)pPict->format));
1031
1032    if (pPict->filter != PictFilterNearest &&
1033	pPict->filter != PictFilterBilinear)
1034	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1035
1036    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1037     * picture results in alpha=0 pixels. We can implement this with a border color
1038     * *if* our source texture has an alpha channel, otherwise we need to fall
1039     * back. If we're not transformed then we hope that upper layers have clipped
1040     * rendering to the bounds of the source drawable, in which case it doesn't
1041     * matter. I have not, however, verified that the X server always does such
1042     * clipping.
1043     */
1044    /* FIXME R6xx */
1045    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1046	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1047	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1048    }
1049
1050    return TRUE;
1051}
1052
1053static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
1054					int unit)
1055{
1056    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
1057    RADEONInfoPtr info = RADEONPTR(pScrn);
1058    struct radeon_accel_state *accel_state = info->accel_state;
1059    int w = pPict->pDrawable->width;
1060    int h = pPict->pDrawable->height;
1061    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1062    unsigned int i;
1063    tex_resource_t  tex_res;
1064    tex_sampler_t   tex_samp;
1065    int pix_r, pix_g, pix_b, pix_a;
1066    float vs_alu_consts[8];
1067
1068    CLEAR (tex_res);
1069    CLEAR (tex_samp);
1070
1071    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1072	if (R600TexFormats[i].fmt == pPict->format)
1073	    break;
1074    }
1075
1076    /* flush texture cache */
1077    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1078			accel_state->src_size[unit], accel_state->src_obj[unit].offset,
1079			accel_state->src_obj[unit].bo, accel_state->src_obj[unit].domain, 0);
1080
1081    /* Texture */
1082    tex_res.id                  = unit;
1083    tex_res.w                   = w;
1084    tex_res.h                   = h;
1085    tex_res.pitch               = accel_state->src_obj[unit].pitch;
1086    tex_res.depth               = 0;
1087    tex_res.dim                 = SQ_TEX_DIM_2D;
1088    tex_res.base                = accel_state->src_obj[unit].offset;
1089    tex_res.mip_base            = accel_state->src_obj[unit].offset;
1090    tex_res.format              = R600TexFormats[i].card_fmt;
1091    tex_res.bo                  = accel_state->src_obj[unit].bo;
1092    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
1093    tex_res.request_size        = 1;
1094
1095    /* component swizzles */
1096    switch (pPict->format) {
1097    case PICT_a1r5g5b5:
1098    case PICT_a8r8g8b8:
1099	pix_r = SQ_SEL_Z; /* R */
1100	pix_g = SQ_SEL_Y; /* G */
1101	pix_b = SQ_SEL_X; /* B */
1102	pix_a = SQ_SEL_W; /* A */
1103	break;
1104    case PICT_a8b8g8r8:
1105	pix_r = SQ_SEL_X; /* R */
1106	pix_g = SQ_SEL_Y; /* G */
1107	pix_b = SQ_SEL_Z; /* B */
1108	pix_a = SQ_SEL_W; /* A */
1109	break;
1110    case PICT_x8b8g8r8:
1111	pix_r = SQ_SEL_X; /* R */
1112	pix_g = SQ_SEL_Y; /* G */
1113	pix_b = SQ_SEL_Z; /* B */
1114	pix_a = SQ_SEL_1; /* A */
1115	break;
1116#ifdef PICT_TYPE_BGRA
1117    case PICT_b8g8r8a8:
1118	pix_r = SQ_SEL_Y; /* R */
1119	pix_g = SQ_SEL_Z; /* G */
1120	pix_b = SQ_SEL_W; /* B */
1121	pix_a = SQ_SEL_X; /* A */
1122	break;
1123    case PICT_b8g8r8x8:
1124	pix_r = SQ_SEL_Y; /* R */
1125	pix_g = SQ_SEL_Z; /* G */
1126	pix_b = SQ_SEL_W; /* B */
1127	pix_a = SQ_SEL_1; /* A */
1128	break;
1129#endif
1130    case PICT_x1r5g5b5:
1131    case PICT_x8r8g8b8:
1132    case PICT_r5g6b5:
1133	pix_r = SQ_SEL_Z; /* R */
1134	pix_g = SQ_SEL_Y; /* G */
1135	pix_b = SQ_SEL_X; /* B */
1136	pix_a = SQ_SEL_1; /* A */
1137	break;
1138    case PICT_a8:
1139	pix_r = SQ_SEL_0; /* R */
1140	pix_g = SQ_SEL_0; /* G */
1141	pix_b = SQ_SEL_0; /* B */
1142	pix_a = SQ_SEL_X; /* A */
1143	break;
1144    default:
1145	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1146    }
1147
1148    if (unit == 0) {
1149	if (!accel_state->msk_pic) {
1150	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1151		pix_r = SQ_SEL_0;
1152		pix_g = SQ_SEL_0;
1153		pix_b = SQ_SEL_0;
1154	    }
1155
1156	    if (PICT_FORMAT_A(pPict->format) == 0)
1157		pix_a = SQ_SEL_1;
1158	} else {
1159	    if (accel_state->component_alpha) {
1160		if (accel_state->src_alpha) {
1161		    if (PICT_FORMAT_A(pPict->format) == 0) {
1162			pix_r = SQ_SEL_1;
1163			pix_g = SQ_SEL_1;
1164			pix_b = SQ_SEL_1;
1165			pix_a = SQ_SEL_1;
1166		    } else {
1167			pix_r = pix_a;
1168			pix_g = pix_a;
1169			pix_b = pix_a;
1170		    }
1171		} else {
1172		    if (PICT_FORMAT_A(pPict->format) == 0)
1173			pix_a = SQ_SEL_1;
1174		}
1175	    } else {
1176		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1177		    pix_r = SQ_SEL_0;
1178		    pix_g = SQ_SEL_0;
1179		    pix_b = SQ_SEL_0;
1180		}
1181
1182		if (PICT_FORMAT_A(pPict->format) == 0)
1183		    pix_a = SQ_SEL_1;
1184	    }
1185	}
1186    } else {
1187	if (accel_state->component_alpha) {
1188	    if (PICT_FORMAT_A(pPict->format) == 0)
1189		pix_a = SQ_SEL_1;
1190	} else {
1191	    if (PICT_FORMAT_A(pPict->format) == 0) {
1192		pix_r = SQ_SEL_1;
1193		pix_g = SQ_SEL_1;
1194		pix_b = SQ_SEL_1;
1195		pix_a = SQ_SEL_1;
1196	    } else {
1197		pix_r = pix_a;
1198		pix_g = pix_a;
1199		pix_b = pix_a;
1200	    }
1201	}
1202    }
1203
1204    tex_res.dst_sel_x           = pix_r; /* R */
1205    tex_res.dst_sel_y           = pix_g; /* G */
1206    tex_res.dst_sel_z           = pix_b; /* B */
1207    tex_res.dst_sel_w           = pix_a; /* A */
1208
1209    tex_res.base_level          = 0;
1210    tex_res.last_level          = 0;
1211    tex_res.perf_modulation     = 0;
1212    set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1213
1214    tex_samp.id                 = unit;
1215    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1216
1217    switch (repeatType) {
1218    case RepeatNormal:
1219	tex_samp.clamp_x            = SQ_TEX_WRAP;
1220	tex_samp.clamp_y            = SQ_TEX_WRAP;
1221	break;
1222    case RepeatPad:
1223	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1224	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1225	break;
1226    case RepeatReflect:
1227	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1228	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1229	break;
1230    case RepeatNone:
1231	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1232	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1233	break;
1234    default:
1235	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1236    }
1237
1238    switch (pPict->filter) {
1239    case PictFilterNearest:
1240	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1241	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1242	break;
1243    case PictFilterBilinear:
1244	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1245	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1246	break;
1247    default:
1248	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1249    }
1250
1251    tex_samp.clamp_z            = SQ_TEX_WRAP;
1252    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1253    tex_samp.mip_filter         = 0;			/* no mipmap */
1254    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
1255
1256    if (pPict->transform != 0) {
1257	accel_state->is_transform[unit] = TRUE;
1258	accel_state->transform[unit] = pPict->transform;
1259
1260	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1261	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1262	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1263	vs_alu_consts[3] = 1.0 / w;
1264
1265	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1266	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1267	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1268	vs_alu_consts[7] = 1.0 / h;
1269    } else {
1270	accel_state->is_transform[unit] = FALSE;
1271
1272	vs_alu_consts[0] = 1.0;
1273	vs_alu_consts[1] = 0.0;
1274	vs_alu_consts[2] = 0.0;
1275	vs_alu_consts[3] = 1.0 / w;
1276
1277	vs_alu_consts[4] = 0.0;
1278	vs_alu_consts[5] = 1.0;
1279	vs_alu_consts[6] = 0.0;
1280	vs_alu_consts[7] = 1.0 / h;
1281    }
1282
1283    /* VS alu constants */
1284    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1285		   sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1286
1287    return TRUE;
1288}
1289
1290static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1291			       PicturePtr pDstPicture)
1292{
1293    uint32_t tmp1;
1294    PixmapPtr pSrcPixmap, pDstPixmap;
1295    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1296
1297    /* Check for unsupported compositing operations. */
1298    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1299	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1300
1301    if (!pSrcPicture->pDrawable)
1302	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1303
1304    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1305
1306    max_tex_w = 8192;
1307    max_tex_h = 8192;
1308    max_dst_w = 8192;
1309    max_dst_h = 8192;
1310
1311    if (pSrcPixmap->drawable.width >= max_tex_w ||
1312	pSrcPixmap->drawable.height >= max_tex_h) {
1313	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1314			 pSrcPixmap->drawable.width,
1315			 pSrcPixmap->drawable.height));
1316    }
1317
1318    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1319
1320    if (pDstPixmap->drawable.width >= max_dst_w ||
1321	pDstPixmap->drawable.height >= max_dst_h) {
1322	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1323			 pDstPixmap->drawable.width,
1324			 pDstPixmap->drawable.height));
1325    }
1326
1327    if (pMaskPicture) {
1328	PixmapPtr pMaskPixmap;
1329
1330	if (!pMaskPicture->pDrawable)
1331	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1332
1333	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1334
1335	if (pMaskPixmap->drawable.width >= max_tex_w ||
1336	    pMaskPixmap->drawable.height >= max_tex_h) {
1337	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1338			     pMaskPixmap->drawable.width,
1339			     pMaskPixmap->drawable.height));
1340	}
1341
1342	if (pMaskPicture->componentAlpha) {
1343	    /* Check if it's component alpha that relies on a source alpha and
1344	     * on the source value.  We can only get one of those into the
1345	     * single source value that we get to blend with.
1346	     */
1347	    if (R600BlendOp[op].src_alpha &&
1348		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1349		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1350		RADEON_FALLBACK(("Component alpha not supported with source "
1351				 "alpha and source value blending.\n"));
1352	    }
1353	}
1354
1355	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1356	    return FALSE;
1357    }
1358
1359    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1360	return FALSE;
1361
1362    if (!R600GetDestFormat(pDstPicture, &tmp1))
1363	return FALSE;
1364
1365    return TRUE;
1366
1367}
1368
1369static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1370				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1371				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1372{
1373    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1374    RADEONInfoPtr info = RADEONPTR(pScrn);
1375    struct radeon_accel_state *accel_state = info->accel_state;
1376    uint32_t blendcntl, dst_format;
1377    cb_config_t cb_conf;
1378    shader_config_t vs_conf, ps_conf;
1379    struct r600_accel_object src_obj, mask_obj, dst_obj;
1380
1381    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1382	return FALSE;
1383
1384#if defined(XF86DRM_MODE)
1385    if (info->cs) {
1386	src_obj.offset = 0;
1387	dst_obj.offset = 0;
1388	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1389	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1390    } else
1391#endif
1392    {
1393	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1394	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1395	src_obj.bo = NULL;
1396	dst_obj.bo = NULL;
1397    }
1398    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1399    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1400
1401    src_obj.width = pSrc->drawable.width;
1402    src_obj.height = pSrc->drawable.height;
1403    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1404    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1405
1406    dst_obj.width = pDst->drawable.width;
1407    dst_obj.height = pDst->drawable.height;
1408    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1409    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1410
1411    if (pMask) {
1412#if defined(XF86DRM_MODE)
1413	if (info->cs) {
1414	    mask_obj.offset = 0;
1415	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1416	} else
1417#endif
1418	{
1419	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1420	    mask_obj.bo = NULL;
1421	}
1422	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1423
1424	mask_obj.width = pMask->drawable.width;
1425	mask_obj.height = pMask->drawable.height;
1426	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1427	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1428
1429	if (!R600SetAccelState(pScrn,
1430			       &src_obj,
1431			       &mask_obj,
1432			       &dst_obj,
1433			       accel_state->comp_vs_offset, accel_state->comp_mask_ps_offset,
1434			       3, 0xffffffff))
1435	    return FALSE;
1436
1437	accel_state->msk_pic = pMaskPicture;
1438	if (pMaskPicture->componentAlpha) {
1439	    accel_state->component_alpha = TRUE;
1440	    if (R600BlendOp[op].src_alpha)
1441		accel_state->src_alpha = TRUE;
1442	    else
1443		accel_state->src_alpha = FALSE;
1444	} else {
1445	    accel_state->component_alpha = FALSE;
1446	    accel_state->src_alpha = FALSE;
1447	}
1448    } else {
1449	if (!R600SetAccelState(pScrn,
1450			       &src_obj,
1451			       NULL,
1452			       &dst_obj,
1453			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1454			       3, 0xffffffff))
1455	    return FALSE;
1456
1457	accel_state->msk_pic = NULL;
1458	accel_state->component_alpha = FALSE;
1459	accel_state->src_alpha = FALSE;
1460    }
1461
1462    if (!R600GetDestFormat(pDstPicture, &dst_format))
1463	return FALSE;
1464
1465    CLEAR (cb_conf);
1466    CLEAR (vs_conf);
1467    CLEAR (ps_conf);
1468
1469    if (pMask)
1470        radeon_vbo_check(pScrn, 24);
1471    else
1472        radeon_vbo_check(pScrn, 16);
1473
1474    r600_cp_start(pScrn);
1475
1476    set_default_state(pScrn, accel_state->ib);
1477
1478    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1479    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1480    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1481
1482    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1483        R600IBDiscard(pScrn, accel_state->ib);
1484        r600_vb_discard(pScrn);
1485        return FALSE;
1486    }
1487
1488    if (pMask) {
1489        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1490            R600IBDiscard(pScrn, accel_state->ib);
1491            r600_vb_discard(pScrn);
1492            return FALSE;
1493        }
1494    } else
1495        accel_state->is_transform[1] = FALSE;
1496
1497    if (pMask)
1498	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1499    else
1500	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1501
1502    /* Shader */
1503
1504    /* flush SQ cache */
1505    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1506			accel_state->vs_size, accel_state->vs_mc_addr,
1507			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1508
1509    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1510    vs_conf.num_gprs            = 3;
1511    vs_conf.stack_size          = 1;
1512    vs_conf.bo                  = accel_state->shaders_bo;
1513    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1514
1515    /* flush SQ cache */
1516    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1517			accel_state->ps_size, accel_state->ps_mc_addr,
1518			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1519
1520    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1521    ps_conf.num_gprs            = 3;
1522    ps_conf.stack_size          = 0;
1523    ps_conf.uncached_first_inst = 1;
1524    ps_conf.clamp_consts        = 0;
1525    ps_conf.export_mode         = 2;
1526    ps_conf.bo                  = accel_state->shaders_bo;
1527    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1528
1529    BEGIN_BATCH(9);
1530    EREG(accel_state->ib, CB_TARGET_MASK,                      (0xf << TARGET0_ENABLE_shift));
1531
1532    blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1533
1534    if (info->ChipFamily == CHIP_FAMILY_R600) {
1535	/* no per-MRT blend on R600 */
1536	EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
1537	EREG(accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
1538    } else {
1539	EREG(accel_state->ib, CB_COLOR_CONTROL,                    (RADEON_ROP[3] |
1540								    (1 << TARGET_BLEND_ENABLE_shift) |
1541								    PER_MRT_BLEND_bit));
1542	EREG(accel_state->ib, CB_BLEND0_CONTROL,                   blendcntl);
1543    }
1544    END_BATCH();
1545
1546    cb_conf.id = 0;
1547    cb_conf.w = accel_state->dst_obj.pitch;
1548    cb_conf.h = accel_state->dst_obj.height;
1549    cb_conf.base = accel_state->dst_obj.offset;
1550    cb_conf.format = dst_format;
1551    cb_conf.bo = accel_state->dst_obj.bo;
1552
1553    switch (pDstPicture->format) {
1554    case PICT_a8r8g8b8:
1555    case PICT_x8r8g8b8:
1556    case PICT_a1r5g5b5:
1557    case PICT_x1r5g5b5:
1558    default:
1559	cb_conf.comp_swap = 1; /* ARGB */
1560	break;
1561    case PICT_a8b8g8r8:
1562    case PICT_x8b8g8r8:
1563	cb_conf.comp_swap = 0; /* ABGR */
1564	break;
1565#ifdef PICT_TYPE_BGRA
1566    case PICT_b8g8r8a8:
1567    case PICT_b8g8r8x8:
1568	cb_conf.comp_swap = 3; /* BGRA */
1569	break;
1570#endif
1571    case PICT_r5g6b5:
1572	cb_conf.comp_swap = 2; /* RGB */
1573	break;
1574    case PICT_a8:
1575	cb_conf.comp_swap = 3; /* A */
1576	break;
1577    }
1578    cb_conf.source_format = 1;
1579    cb_conf.blend_clamp = 1;
1580    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1581
1582    /* Interpolator setup */
1583    BEGIN_BATCH(21);
1584    if (pMask) {
1585	/* export 2 tex coords from VS */
1586	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
1587	/* src = semantic id 0; mask = semantic id 1 */
1588	EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1589						  (1 << SEMANTIC_1_shift)));
1590	/* input 2 tex coords from VS */
1591	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift));
1592    } else {
1593	/* export 1 tex coords from VS */
1594	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
1595	/* src = semantic id 0 */
1596	EREG(accel_state->ib, SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
1597	/* input 1 tex coords from VS */
1598	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
1599    }
1600    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
1601    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1602    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
1603								(0x01 << DEFAULT_VAL_shift)	|
1604								SEL_CENTROID_bit));
1605    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1606    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2),       ((1    << SEMANTIC_shift)	|
1607								(0x01 << DEFAULT_VAL_shift)	|
1608								SEL_CENTROID_bit));
1609    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
1610    END_BATCH();
1611
1612    if (accel_state->vsync)
1613	R600VlineHelperClear(pScrn);
1614
1615    return TRUE;
1616}
1617
1618static void R600Composite(PixmapPtr pDst,
1619			  int srcX, int srcY,
1620			  int maskX, int maskY,
1621			  int dstX, int dstY,
1622			  int w, int h)
1623{
1624    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1625    RADEONInfoPtr info = RADEONPTR(pScrn);
1626    struct radeon_accel_state *accel_state = info->accel_state;
1627    float *vb;
1628
1629    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1630       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1631
1632    if (accel_state->vsync)
1633	R600VlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1634
1635    if (accel_state->msk_pic) {
1636
1637	vb = radeon_vbo_space(pScrn, 24);
1638
1639	vb[0] = (float)dstX;
1640	vb[1] = (float)dstY;
1641	vb[2] = (float)srcX;
1642	vb[3] = (float)srcY;
1643	vb[4] = (float)maskX;
1644	vb[5] = (float)maskY;
1645
1646	vb[6] = (float)dstX;
1647	vb[7] = (float)(dstY + h);
1648	vb[8] = (float)srcX;
1649	vb[9] = (float)(srcY + h);
1650	vb[10] = (float)maskX;
1651	vb[11] = (float)(maskY + h);
1652
1653	vb[12] = (float)(dstX + w);
1654	vb[13] = (float)(dstY + h);
1655	vb[14] = (float)(srcX + w);
1656	vb[15] = (float)(srcY + h);
1657	vb[16] = (float)(maskX + w);
1658	vb[17] = (float)(maskY + h);
1659
1660	radeon_vbo_commit(pScrn);
1661
1662    } else {
1663
1664	vb = radeon_vbo_space(pScrn, 16);
1665
1666	vb[0] = (float)dstX;
1667	vb[1] = (float)dstY;
1668	vb[2] = (float)srcX;
1669	vb[3] = (float)srcY;
1670
1671	vb[4] = (float)dstX;
1672	vb[5] = (float)(dstY + h);
1673	vb[6] = (float)srcX;
1674	vb[7] = (float)(srcY + h);
1675
1676	vb[8] = (float)(dstX + w);
1677	vb[9] = (float)(dstY + h);
1678	vb[10] = (float)(srcX + w);
1679	vb[11] = (float)(srcY + h);
1680
1681	radeon_vbo_commit(pScrn);
1682    }
1683
1684
1685}
1686
1687static void R600DoneComposite(PixmapPtr pDst)
1688{
1689    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1690    RADEONInfoPtr info = RADEONPTR(pScrn);
1691    struct radeon_accel_state *accel_state = info->accel_state;
1692    int vtx_size;
1693
1694    if (accel_state->vsync)
1695	cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1696			   accel_state->vline_crtc,
1697			   accel_state->vline_y1,
1698			   accel_state->vline_y2);
1699
1700    vtx_size = accel_state->msk_pic ? 24 : 16;
1701
1702    r600_finish_op(pScrn, vtx_size);
1703}
1704
1705Bool
1706R600CopyToVRAM(ScrnInfoPtr pScrn,
1707	       char *src, int src_pitch,
1708	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1709	       int x, int y, int w, int h)
1710{
1711    RADEONInfoPtr info = RADEONPTR(pScrn);
1712    struct radeon_accel_state *accel_state = info->accel_state;
1713    uint32_t scratch_mc_addr;
1714    int wpass = w * (bpp/8);
1715    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1716    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1717    int scratch_offset = 0, hpass, temph;
1718    char *dst;
1719    drmBufPtr scratch;
1720    struct r600_accel_object scratch_obj, dst_obj;
1721
1722    if (dst_pitch & 7)
1723	return FALSE;
1724
1725    if (dst_mc_addr & 0xff)
1726	return FALSE;
1727
1728    scratch = RADEONCPGetBuffer(pScrn);
1729    if (scratch == NULL)
1730	return FALSE;
1731
1732    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1733    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1734    dst = (char *)scratch->address;
1735
1736    scratch_obj.pitch = scratch_pitch;
1737    scratch_obj.width = w;
1738    scratch_obj.height = hpass;
1739    scratch_obj.offset = scratch_mc_addr;
1740    scratch_obj.bpp = bpp;
1741    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1742    scratch_obj.bo = NULL;
1743
1744    dst_obj.pitch = dst_pitch;
1745    dst_obj.width = dst_width;
1746    dst_obj.height = dst_height;
1747    dst_obj.offset = dst_mc_addr;
1748    dst_obj.bo = NULL;
1749    dst_obj.bpp = bpp;
1750    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1751
1752    if (!R600SetAccelState(pScrn,
1753			   &scratch_obj,
1754			   NULL,
1755			   &dst_obj,
1756			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1757			   3, 0xffffffff))
1758	return FALSE;
1759
1760    /* memcopy from sys to scratch */
1761    while (temph--) {
1762	memcpy (dst, src, wpass);
1763	src += src_pitch;
1764	dst += scratch_pitch_bytes;
1765    }
1766
1767    while (h) {
1768	uint32_t offset = scratch_mc_addr + scratch_offset;
1769	int oldhpass = hpass;
1770	h -= oldhpass;
1771	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1772
1773	if (hpass) {
1774	    scratch_offset = scratch->total/2 - scratch_offset;
1775	    dst = (char *)scratch->address + scratch_offset;
1776	    /* wait for the engine to be idle */
1777	    RADEONWaitForIdleCP(pScrn);
1778	    //memcopy from sys to scratch
1779	    while (temph--) {
1780		memcpy (dst, src, wpass);
1781		src += src_pitch;
1782		dst += scratch_pitch_bytes;
1783	    }
1784	}
1785	/* blit from scratch to vram */
1786	info->accel_state->src_obj[0].height = oldhpass;
1787	info->accel_state->src_obj[0].offset = offset;
1788	R600DoPrepareCopy(pScrn);
1789	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1790	R600DoCopy(pScrn);
1791	y += oldhpass;
1792    }
1793
1794    R600IBDiscard(pScrn, scratch);
1795    r600_vb_discard(pScrn);
1796
1797    return TRUE;
1798}
1799
1800static Bool
1801R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1802		   char *src, int src_pitch)
1803{
1804    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1805    RADEONInfoPtr info = RADEONPTR(pScrn);
1806    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1807    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1808    int bpp = pDst->drawable.bitsPerPixel;
1809
1810    return R600CopyToVRAM(pScrn,
1811			  src, src_pitch,
1812			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1813			  x, y, w, h);
1814}
1815
1816static Bool
1817R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1818		       char *dst, int dst_pitch)
1819{
1820    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1821    RADEONInfoPtr info = RADEONPTR(pScrn);
1822    struct radeon_accel_state *accel_state = info->accel_state;
1823    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1824    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1825    uint32_t src_width = pSrc->drawable.width;
1826    uint32_t src_height = pSrc->drawable.height;
1827    int bpp = pSrc->drawable.bitsPerPixel;
1828    uint32_t scratch_mc_addr;
1829    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1830    int scratch_offset = 0, hpass;
1831    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1832    int wpass = w * (bpp/8);
1833    drmBufPtr scratch;
1834    struct r600_accel_object scratch_obj, src_obj;
1835
1836    /* bad pipe setup in drm prior to 1.32 */
1837    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1838	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1839		    return FALSE;
1840    }
1841
1842    if (src_pitch & 7)
1843	return FALSE;
1844
1845    scratch = RADEONCPGetBuffer(pScrn);
1846    if (scratch == NULL)
1847	return FALSE;
1848
1849    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1850    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1851
1852    src_obj.pitch = src_pitch;
1853    src_obj.width = src_width;
1854    src_obj.height = src_height;
1855    src_obj.offset = src_mc_addr;
1856    src_obj.bo = NULL;
1857    src_obj.bpp = bpp;
1858    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1859
1860    scratch_obj.pitch = scratch_pitch;
1861    scratch_obj.width = src_width;
1862    scratch_obj.height = hpass;
1863    scratch_obj.offset = scratch_mc_addr;
1864    scratch_obj.bpp = bpp;
1865    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1866    scratch_obj.bo = NULL;
1867
1868    if (!R600SetAccelState(pScrn,
1869			   &src_obj,
1870			   NULL,
1871			   &scratch_obj,
1872			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1873			   3, 0xffffffff))
1874	return FALSE;
1875
1876    /* blit from vram to scratch */
1877    R600DoPrepareCopy(pScrn);
1878    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1879    R600DoCopy(pScrn);
1880
1881    while (h) {
1882	char *src = (char *)scratch->address + scratch_offset;
1883	int oldhpass = hpass;
1884	h -= oldhpass;
1885	y += oldhpass;
1886	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1887
1888	if (hpass) {
1889	    scratch_offset = scratch->total/2 - scratch_offset;
1890	    /* blit from vram to scratch */
1891	    info->accel_state->dst_obj.height = hpass;
1892	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1893	    R600DoPrepareCopy(pScrn);
1894	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1895	    R600DoCopy(pScrn);
1896	}
1897
1898	/* wait for the engine to be idle */
1899	RADEONWaitForIdleCP(pScrn);
1900	/* memcopy from scratch to sys */
1901	while (oldhpass--) {
1902	    memcpy (dst, src, wpass);
1903	    dst += dst_pitch;
1904	    src += scratch_pitch_bytes;
1905	}
1906    }
1907
1908    R600IBDiscard(pScrn, scratch);
1909    r600_vb_discard(pScrn);
1910
1911    return TRUE;
1912
1913}
1914
1915#if defined(XF86DRM_MODE)
1916
1917static Bool
1918R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1919		     char *src, int src_pitch)
1920{
1921    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1922    RADEONInfoPtr info = RADEONPTR(pScrn);
1923    struct radeon_accel_state *accel_state = info->accel_state;
1924    struct radeon_exa_pixmap_priv *driver_priv;
1925    struct radeon_bo *scratch;
1926    unsigned size;
1927    uint32_t dst_domain;
1928    int bpp = pDst->drawable.bitsPerPixel;
1929    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
1930    uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
1931    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1932    Bool r;
1933    int i;
1934    struct r600_accel_object src_obj, dst_obj;
1935
1936    if (bpp < 8)
1937	return FALSE;
1938
1939    driver_priv = exaGetPixmapDriverPrivate(pDst);
1940
1941    /* If we know the BO won't be busy, don't bother */
1942    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
1943	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1944	return FALSE;
1945
1946    size = scratch_pitch * h;
1947    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
1948    if (scratch == NULL) {
1949	return FALSE;
1950    }
1951
1952    src_obj.pitch = src_pitch_hw;
1953    src_obj.width = w;
1954    src_obj.height = h;
1955    src_obj.offset = 0;
1956    src_obj.bpp = bpp;
1957    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1958    src_obj.bo = scratch;
1959
1960    dst_obj.pitch = dst_pitch_hw;
1961    dst_obj.width = pDst->drawable.width;
1962    dst_obj.height = pDst->drawable.height;
1963    dst_obj.offset = 0;
1964    dst_obj.bpp = bpp;
1965    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1966    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1967
1968    if (!R600SetAccelState(pScrn,
1969			   &src_obj,
1970			   NULL,
1971			   &dst_obj,
1972			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1973			   3, 0xffffffff)) {
1974        r = FALSE;
1975        goto out;
1976    }
1977
1978    r = radeon_bo_map(scratch, 0);
1979    if (r) {
1980        r = FALSE;
1981        goto out;
1982    }
1983    r = TRUE;
1984    size = w * bpp / 8;
1985    for (i = 0; i < h; i++) {
1986        memcpy(scratch->ptr + i * scratch_pitch, src, size);
1987        src += src_pitch;
1988    }
1989    radeon_bo_unmap(scratch);
1990
1991    if (info->accel_state->vsync)
1992	R600VlineHelperSet(pScrn, x, y, x + w, y + h);
1993
1994    /* blit from gart to vram */
1995    R600DoPrepareCopy(pScrn);
1996    R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1997    R600DoCopyVline(pDst);
1998
1999out:
2000    radeon_bo_unref(scratch);
2001    return r;
2002}
2003
2004static Bool
2005R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
2006			 int h, char *dst, int dst_pitch)
2007{
2008    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
2009    RADEONInfoPtr info = RADEONPTR(pScrn);
2010    struct radeon_accel_state *accel_state = info->accel_state;
2011    struct radeon_exa_pixmap_priv *driver_priv;
2012    struct radeon_bo *scratch;
2013    unsigned size;
2014    uint32_t src_domain = 0;
2015    int bpp = pSrc->drawable.bitsPerPixel;
2016    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
2017    uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
2018    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
2019    Bool r;
2020    struct r600_accel_object src_obj, dst_obj;
2021
2022    if (bpp < 8)
2023	return FALSE;
2024
2025    driver_priv = exaGetPixmapDriverPrivate(pSrc);
2026
2027    /* If we know the BO won't end up in VRAM anyway, don't bother */
2028    if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
2029	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
2030	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
2031	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
2032	    src_domain = 0;
2033    }
2034
2035    if (!src_domain)
2036	radeon_bo_is_busy(driver_priv->bo, &src_domain);
2037
2038    if (src_domain != RADEON_GEM_DOMAIN_VRAM)
2039	return FALSE;
2040
2041    size = scratch_pitch * h;
2042    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
2043    if (scratch == NULL) {
2044	return FALSE;
2045    }
2046    radeon_cs_space_reset_bos(info->cs);
2047    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2048				      RADEON_GEM_DOMAIN_VRAM, 0);
2049    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2050    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2051    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2052    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
2053    r = radeon_cs_space_check(info->cs);
2054    if (r) {
2055        r = FALSE;
2056        goto out;
2057    }
2058
2059    src_obj.pitch = src_pitch_hw;
2060    src_obj.width = pSrc->drawable.width;
2061    src_obj.height = pSrc->drawable.height;
2062    src_obj.offset = 0;
2063    src_obj.bpp = bpp;
2064    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2065    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2066
2067    dst_obj.pitch = dst_pitch_hw;
2068    dst_obj.width = w;
2069    dst_obj.height = h;
2070    dst_obj.offset = 0;
2071    dst_obj.bo = scratch;
2072    dst_obj.bpp = bpp;
2073    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2074
2075    if (!R600SetAccelState(pScrn,
2076			   &src_obj,
2077			   NULL,
2078			   &dst_obj,
2079			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2080			   3, 0xffffffff)) {
2081        r = FALSE;
2082        goto out;
2083    }
2084
2085    /* blit from vram to gart */
2086    R600DoPrepareCopy(pScrn);
2087    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2088    R600DoCopy(pScrn);
2089
2090    if (info->cs)
2091	radeon_cs_flush_indirect(pScrn);
2092
2093    r = radeon_bo_map(scratch, 0);
2094    if (r) {
2095        r = FALSE;
2096        goto out;
2097    }
2098    r = TRUE;
2099    w *= bpp / 8;
2100    size = 0;
2101    while (h--) {
2102        memcpy(dst, scratch->ptr + size, w);
2103        size += scratch_pitch;
2104        dst += dst_pitch;
2105    }
2106    radeon_bo_unmap(scratch);
2107out:
2108    radeon_bo_unref(scratch);
2109    return r;
2110}
2111#endif
2112
2113static int
2114R600MarkSync(ScreenPtr pScreen)
2115{
2116    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2117    RADEONInfoPtr info = RADEONPTR(pScrn);
2118    struct radeon_accel_state *accel_state = info->accel_state;
2119
2120    return ++accel_state->exaSyncMarker;
2121
2122}
2123
2124static void
2125R600Sync(ScreenPtr pScreen, int marker)
2126{
2127    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2128    RADEONInfoPtr info = RADEONPTR(pScrn);
2129    struct radeon_accel_state *accel_state = info->accel_state;
2130
2131    if (accel_state->exaMarkerSynced != marker) {
2132#ifdef XF86DRM_MODE
2133#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2134	if (!info->cs)
2135#endif
2136#endif
2137	    RADEONWaitForIdleCP(pScrn);
2138	accel_state->exaMarkerSynced = marker;
2139    }
2140
2141}
2142
2143static Bool
2144R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2145{
2146    RADEONInfoPtr info = RADEONPTR(pScrn);
2147    struct radeon_accel_state *accel_state = info->accel_state;
2148
2149    /* 512 bytes per shader for now */
2150    int size = 512 * 9;
2151
2152    accel_state->shaders = NULL;
2153
2154#ifdef XF86DRM_MODE
2155#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2156    if (info->cs) {
2157	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2158						 RADEON_GEM_DOMAIN_VRAM, 0);
2159	if (accel_state->shaders_bo == NULL) {
2160	    ErrorF("Allocating shader failed\n");
2161	    return FALSE;
2162	}
2163	return TRUE;
2164    } else
2165#endif
2166#endif
2167    {
2168	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2169						 TRUE, NULL, NULL);
2170
2171	if (accel_state->shaders == NULL)
2172	    return FALSE;
2173    }
2174
2175    return TRUE;
2176}
2177
2178Bool
2179R600LoadShaders(ScrnInfoPtr pScrn)
2180{
2181    RADEONInfoPtr info = RADEONPTR(pScrn);
2182    struct radeon_accel_state *accel_state = info->accel_state;
2183    RADEONChipFamily ChipSet = info->ChipFamily;
2184    uint32_t *shader;
2185#ifdef XF86DRM_MODE
2186#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2187    int ret;
2188
2189    if (info->cs) {
2190	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2191	if (ret) {
2192	    FatalError("failed to map shader %d\n", ret);
2193	    return FALSE;
2194	}
2195	shader = accel_state->shaders_bo->ptr;
2196    } else
2197#endif
2198#endif
2199	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2200
2201    /*  solid vs --------------------------------------- */
2202    accel_state->solid_vs_offset = 0;
2203    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2204
2205    /*  solid ps --------------------------------------- */
2206    accel_state->solid_ps_offset = 512;
2207    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2208
2209    /*  copy vs --------------------------------------- */
2210    accel_state->copy_vs_offset = 1024;
2211    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2212
2213    /*  copy ps --------------------------------------- */
2214    accel_state->copy_ps_offset = 1536;
2215    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2216
2217    /*  comp vs --------------------------------------- */
2218    accel_state->comp_vs_offset = 2048;
2219    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2220
2221    /*  comp ps --------------------------------------- */
2222    accel_state->comp_ps_offset = 2560;
2223    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2224
2225    /*  comp mask ps --------------------------------------- */
2226    accel_state->comp_mask_ps_offset = 3072;
2227    R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
2228
2229    /*  xv vs --------------------------------------- */
2230    accel_state->xv_vs_offset = 3584;
2231    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2232
2233    /*  xv ps --------------------------------------- */
2234    accel_state->xv_ps_offset = 4096;
2235    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2236
2237#ifdef XF86DRM_MODE
2238#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2239    if (info->cs) {
2240	radeon_bo_unmap(accel_state->shaders_bo);
2241    }
2242#endif
2243#endif
2244
2245    return TRUE;
2246}
2247
2248static Bool
2249R600PrepareAccess(PixmapPtr pPix, int index)
2250{
2251    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2252    RADEONInfoPtr info = RADEONPTR(pScrn);
2253    unsigned char *RADEONMMIO = info->MMIO;
2254
2255    /* flush HDP read/write caches */
2256    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2257
2258    return TRUE;
2259}
2260
2261static void
2262R600FinishAccess(PixmapPtr pPix, int index)
2263{
2264    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2265    RADEONInfoPtr info = RADEONPTR(pScrn);
2266    unsigned char *RADEONMMIO = info->MMIO;
2267
2268    /* flush HDP read/write caches */
2269    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2270
2271}
2272
2273Bool
2274R600DrawInit(ScreenPtr pScreen)
2275{
2276    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2277    RADEONInfoPtr info   = RADEONPTR(pScrn);
2278
2279    if (info->accel_state->exa == NULL) {
2280	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2281	return FALSE;
2282    }
2283
2284    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2285    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2286
2287    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2288    info->accel_state->exa->Solid = R600Solid;
2289    info->accel_state->exa->DoneSolid = R600DoneSolid;
2290
2291    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2292    info->accel_state->exa->Copy = R600Copy;
2293    info->accel_state->exa->DoneCopy = R600DoneCopy;
2294
2295    info->accel_state->exa->MarkSync = R600MarkSync;
2296    info->accel_state->exa->WaitMarker = R600Sync;
2297
2298#ifdef XF86DRM_MODE
2299#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2300    if (info->cs) {
2301	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2302	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2303	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2304	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2305	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2306	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2307	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2308    } else
2309#endif
2310#endif
2311    {
2312	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2313	info->accel_state->exa->FinishAccess = R600FinishAccess;
2314
2315	/* AGP seems to have problems with gart transfers */
2316	if (info->accelDFS) {
2317	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2318	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2319	}
2320    }
2321
2322    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2323#ifdef EXA_SUPPORTS_PREPARE_AUX
2324    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2325#endif
2326
2327#ifdef XF86DRM_MODE
2328#ifdef EXA_HANDLES_PIXMAPS
2329    if (info->cs) {
2330	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2331#ifdef EXA_MIXED_PIXMAPS
2332	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2333#endif
2334    }
2335#endif
2336#endif
2337    info->accel_state->exa->pixmapOffsetAlign = 256;
2338    info->accel_state->exa->pixmapPitchAlign = 256;
2339
2340    info->accel_state->exa->CheckComposite = R600CheckComposite;
2341    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2342    info->accel_state->exa->Composite = R600Composite;
2343    info->accel_state->exa->DoneComposite = R600DoneComposite;
2344
2345#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2346    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2347
2348    info->accel_state->exa->maxPitchBytes = 32768;
2349    info->accel_state->exa->maxX = 8192;
2350#else
2351    info->accel_state->exa->maxX = 8192;
2352#endif
2353    info->accel_state->exa->maxY = 8192;
2354
2355    /* not supported yet */
2356    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2357	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2358	info->accel_state->vsync = TRUE;
2359    } else
2360	info->accel_state->vsync = FALSE;
2361
2362    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2363	free(info->accel_state->exa);
2364	return FALSE;
2365    }
2366
2367#ifdef XF86DRM_MODE
2368#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2369    if (!info->cs)
2370#endif
2371#endif
2372	if (!info->gartLocation)
2373	    return FALSE;
2374
2375    info->accel_state->XInited3D = FALSE;
2376    info->accel_state->copy_area = NULL;
2377    info->accel_state->src_obj[0].bo = NULL;
2378    info->accel_state->src_obj[1].bo = NULL;
2379    info->accel_state->dst_obj.bo = NULL;
2380    info->accel_state->copy_area_bo = NULL;
2381    info->accel_state->vb_start_op = -1;
2382    R600VlineHelperClear(pScrn);
2383
2384#ifdef XF86DRM_MODE
2385    radeon_vbo_init_lists(pScrn);
2386#endif
2387
2388    if (!R600AllocShaders(pScrn, pScreen))
2389	return FALSE;
2390
2391    if (!R600LoadShaders(pScrn))
2392	return FALSE;
2393
2394    exaMarkSync(pScreen);
2395
2396    return TRUE;
2397
2398}
2399
2400