evergreen_exa.c revision de2362d3
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_reg.h"
37#include "evergreen_shader.h"
38#include "evergreen_reg.h"
39#include "evergreen_state.h"
40#include "radeon_exa_shared.h"
41#include "radeon_vbo.h"
42
43extern int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
44extern int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
45
46extern int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
47extern int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
48
49extern int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
50extern int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
51
52extern int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
53extern int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
54
55static Bool
56EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
57{
58    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
59    RADEONInfoPtr info = RADEONPTR(pScrn);
60    struct radeon_accel_state *accel_state = info->accel_state;
61    cb_config_t     cb_conf;
62    shader_config_t vs_conf, ps_conf;
63    uint32_t a, r, g, b;
64    float *ps_alu_consts;
65    const_config_t ps_const_conf;
66    struct r600_accel_object dst;
67
68
69    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
70	RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n"));
71    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
72	RADEON_FALLBACK(("invalid planemask\n"));
73
74    dst.bo = radeon_get_pixmap_bo(pPix);
75    dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
76    dst.surface = radeon_get_pixmap_surface(pPix);
77
78    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
79    dst.width = pPix->drawable.width;
80    dst.height = pPix->drawable.height;
81    dst.bpp = pPix->drawable.bitsPerPixel;
82    dst.domain = RADEON_GEM_DOMAIN_VRAM;
83
84    if (!R600SetAccelState(pScrn,
85			   NULL,
86			   NULL,
87			   &dst,
88			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
89			   alu, pm))
90	return FALSE;
91
92    CLEAR (cb_conf);
93    CLEAR (vs_conf);
94    CLEAR (ps_conf);
95    CLEAR (ps_const_conf);
96
97    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
98    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
99    radeon_cp_start(pScrn);
100
101    evergreen_set_default_state(pScrn);
102
103    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
104    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
105    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
106
107    /* Shader */
108    vs_conf.shader_addr         = accel_state->vs_mc_addr;
109    vs_conf.shader_size         = accel_state->vs_size;
110    vs_conf.num_gprs            = 2;
111    vs_conf.stack_size          = 0;
112    vs_conf.bo                  = accel_state->shaders_bo;
113    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
114
115    ps_conf.shader_addr         = accel_state->ps_mc_addr;
116    ps_conf.shader_size         = accel_state->ps_size;
117    ps_conf.num_gprs            = 1;
118    ps_conf.stack_size          = 0;
119    ps_conf.clamp_consts        = 0;
120    ps_conf.export_mode         = 2;
121    ps_conf.bo                  = accel_state->shaders_bo;
122    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
123
124    cb_conf.id = 0;
125    cb_conf.w = accel_state->dst_obj.pitch;
126    cb_conf.h = accel_state->dst_obj.height;
127    cb_conf.base = 0;
128    cb_conf.bo = accel_state->dst_obj.bo;
129    cb_conf.surface = accel_state->dst_obj.surface;
130
131    if (accel_state->dst_obj.bpp == 8) {
132	cb_conf.format = COLOR_8;
133	cb_conf.comp_swap = 3; /* A */
134    } else if (accel_state->dst_obj.bpp == 16) {
135	cb_conf.format = COLOR_5_6_5;
136	cb_conf.comp_swap = 2; /* RGB */
137#if X_BYTE_ORDER == X_BIG_ENDIAN
138	cb_conf.endian = ENDIAN_8IN16;
139#endif
140    } else {
141	cb_conf.format = COLOR_8_8_8_8;
142	cb_conf.comp_swap = 1; /* ARGB */
143#if X_BYTE_ORDER == X_BIG_ENDIAN
144	cb_conf.endian = ENDIAN_8IN32;
145#endif
146    }
147    cb_conf.source_format = EXPORT_4C_16BPC;
148    cb_conf.blend_clamp = 1;
149    /* Render setup */
150    if (accel_state->planemask & 0x000000ff)
151	cb_conf.pmask |= 4; /* B */
152    if (accel_state->planemask & 0x0000ff00)
153	cb_conf.pmask |= 2; /* G */
154    if (accel_state->planemask & 0x00ff0000)
155	cb_conf.pmask |= 1; /* R */
156    if (accel_state->planemask & 0xff000000)
157	cb_conf.pmask |= 8; /* A */
158    cb_conf.rop = accel_state->rop;
159    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
160	RADEON_TILING_LINEAR) {
161	cb_conf.array_mode = 0;
162	cb_conf.non_disp_tiling = 1;
163    }
164    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
165
166    evergreen_set_spi(pScrn, 0, 0);
167
168    /* PS alu constants */
169    ps_const_conf.size_bytes = 256;
170    ps_const_conf.type = SHADER_TYPE_PS;
171    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
172    ps_const_conf.bo = accel_state->cbuf.vb_bo;
173    ps_const_conf.const_addr = accel_state->cbuf.vb_offset;
174    ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts;
175    if (accel_state->dst_obj.bpp == 16) {
176	r = (fg >> 11) & 0x1f;
177	g = (fg >> 5) & 0x3f;
178	b = (fg >> 0) & 0x1f;
179	ps_alu_consts[0] = (float)r / 31; /* R */
180	ps_alu_consts[1] = (float)g / 63; /* G */
181	ps_alu_consts[2] = (float)b / 31; /* B */
182	ps_alu_consts[3] = 1.0; /* A */
183    } else if (accel_state->dst_obj.bpp == 8) {
184	a = (fg >> 0) & 0xff;
185	ps_alu_consts[0] = 0.0; /* R */
186	ps_alu_consts[1] = 0.0; /* G */
187	ps_alu_consts[2] = 0.0; /* B */
188	ps_alu_consts[3] = (float)a / 255; /* A */
189    } else {
190	a = (fg >> 24) & 0xff;
191	r = (fg >> 16) & 0xff;
192	g = (fg >> 8) & 0xff;
193	b = (fg >> 0) & 0xff;
194	ps_alu_consts[0] = (float)r / 255; /* R */
195	ps_alu_consts[1] = (float)g / 255; /* G */
196	ps_alu_consts[2] = (float)b / 255; /* B */
197	ps_alu_consts[3] = (float)a / 255; /* A */
198    }
199    radeon_vbo_commit(pScrn, &accel_state->cbuf);
200    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
201
202    if (accel_state->vsync)
203	RADEONVlineHelperClear(pScrn);
204
205    accel_state->dst_pix = pPix;
206    accel_state->fg = fg;
207
208    return TRUE;
209}
210
211static void
212EVERGREENDoneSolid(PixmapPtr pPix)
213{
214    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
215    RADEONInfoPtr info = RADEONPTR(pScrn);
216    struct radeon_accel_state *accel_state = info->accel_state;
217
218    if (accel_state->vsync)
219	evergreen_cp_wait_vline_sync(pScrn, pPix,
220				     accel_state->vline_crtc,
221				     accel_state->vline_y1,
222				     accel_state->vline_y2);
223
224    evergreen_finish_op(pScrn, 8);
225}
226
227static void
228EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
229{
230    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
231    RADEONInfoPtr info = RADEONPTR(pScrn);
232    struct radeon_accel_state *accel_state = info->accel_state;
233    float *vb;
234
235    if (CS_FULL(info->cs)) {
236	EVERGREENDoneSolid(info->accel_state->dst_pix);
237	radeon_cs_flush_indirect(pScrn);
238	EVERGREENPrepareSolid(accel_state->dst_pix,
239			      accel_state->rop,
240			      accel_state->planemask,
241			      accel_state->fg);
242    }
243
244    if (accel_state->vsync)
245	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
246
247    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
248
249    vb[0] = (float)x1;
250    vb[1] = (float)y1;
251
252    vb[2] = (float)x1;
253    vb[3] = (float)y2;
254
255    vb[4] = (float)x2;
256    vb[5] = (float)y2;
257
258    radeon_vbo_commit(pScrn, &accel_state->vbo);
259}
260
261static void
262EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
263{
264    RADEONInfoPtr info = RADEONPTR(pScrn);
265    struct radeon_accel_state *accel_state = info->accel_state;
266    cb_config_t     cb_conf;
267    tex_resource_t  tex_res;
268    tex_sampler_t   tex_samp;
269    shader_config_t vs_conf, ps_conf;
270
271    CLEAR (cb_conf);
272    CLEAR (tex_res);
273    CLEAR (tex_samp);
274    CLEAR (vs_conf);
275    CLEAR (ps_conf);
276
277    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
278    radeon_cp_start(pScrn);
279
280    evergreen_set_default_state(pScrn);
281
282    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
283    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
284    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
285
286    /* Shader */
287    vs_conf.shader_addr         = accel_state->vs_mc_addr;
288    vs_conf.shader_size         = accel_state->vs_size;
289    vs_conf.num_gprs            = 2;
290    vs_conf.stack_size          = 0;
291    vs_conf.bo                  = accel_state->shaders_bo;
292    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
293
294    ps_conf.shader_addr         = accel_state->ps_mc_addr;
295    ps_conf.shader_size         = accel_state->ps_size;
296    ps_conf.num_gprs            = 1;
297    ps_conf.stack_size          = 0;
298    ps_conf.clamp_consts        = 0;
299    ps_conf.export_mode         = 2;
300    ps_conf.bo                  = accel_state->shaders_bo;
301    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
302
303    /* Texture */
304    tex_res.id                  = 0;
305    tex_res.w                   = accel_state->src_obj[0].width;
306    tex_res.h                   = accel_state->src_obj[0].height;
307    tex_res.pitch               = accel_state->src_obj[0].pitch;
308    tex_res.depth               = 0;
309    tex_res.dim                 = SQ_TEX_DIM_2D;
310    tex_res.base                = 0;
311    tex_res.mip_base            = 0;
312    tex_res.size                = accel_state->src_size[0];
313    tex_res.bo                  = accel_state->src_obj[0].bo;
314    tex_res.mip_bo              = accel_state->src_obj[0].bo;
315    tex_res.surface             = accel_state->src_obj[0].surface;
316    if (accel_state->src_obj[0].bpp == 8) {
317	tex_res.format              = FMT_8;
318	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
319	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
320	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
321	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
322    } else if (accel_state->src_obj[0].bpp == 16) {
323	tex_res.format              = FMT_5_6_5;
324	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
325	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
326	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
327	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
328    } else {
329	tex_res.format              = FMT_8_8_8_8;
330	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
331	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
332	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
333	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
334    }
335
336    tex_res.base_level          = 0;
337    tex_res.last_level          = 0;
338    tex_res.perf_modulation     = 0;
339    if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
340	RADEON_TILING_LINEAR)
341	tex_res.array_mode          = 0;
342    evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
343
344    tex_samp.id                 = 0;
345    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
346    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
347    tex_samp.clamp_z            = SQ_TEX_WRAP;
348    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
349    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
350    tex_samp.mc_coord_truncate  = 1;
351    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
352    tex_samp.mip_filter         = 0;			/* no mipmap */
353    evergreen_set_tex_sampler   (pScrn, &tex_samp);
354
355    cb_conf.id = 0;
356    cb_conf.w = accel_state->dst_obj.pitch;
357    cb_conf.h = accel_state->dst_obj.height;
358    cb_conf.base = 0;
359    cb_conf.bo = accel_state->dst_obj.bo;
360    cb_conf.surface = accel_state->dst_obj.surface;
361    if (accel_state->dst_obj.bpp == 8) {
362	cb_conf.format = COLOR_8;
363	cb_conf.comp_swap = 3; /* A */
364    } else if (accel_state->dst_obj.bpp == 16) {
365	cb_conf.format = COLOR_5_6_5;
366	cb_conf.comp_swap = 2; /* RGB */
367    } else {
368	cb_conf.format = COLOR_8_8_8_8;
369	cb_conf.comp_swap = 1; /* ARGB */
370    }
371    cb_conf.source_format = EXPORT_4C_16BPC;
372    cb_conf.blend_clamp = 1;
373    /* Render setup */
374    if (accel_state->planemask & 0x000000ff)
375	cb_conf.pmask |= 4; /* B */
376    if (accel_state->planemask & 0x0000ff00)
377	cb_conf.pmask |= 2; /* G */
378    if (accel_state->planemask & 0x00ff0000)
379	cb_conf.pmask |= 1; /* R */
380    if (accel_state->planemask & 0xff000000)
381	cb_conf.pmask |= 8; /* A */
382    cb_conf.rop = accel_state->rop;
383    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
384	RADEON_TILING_LINEAR) {
385	cb_conf.array_mode = 0;
386	cb_conf.non_disp_tiling = 1;
387    }
388    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
389
390    evergreen_set_spi(pScrn, (1 - 1), 1);
391
392}
393
394static void
395EVERGREENDoCopy(ScrnInfoPtr pScrn)
396{
397    evergreen_finish_op(pScrn, 16);
398}
399
400static void
401EVERGREENDoCopyVline(PixmapPtr pPix)
402{
403    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
404    RADEONInfoPtr info = RADEONPTR(pScrn);
405    struct radeon_accel_state *accel_state = info->accel_state;
406
407    if (accel_state->vsync)
408	evergreen_cp_wait_vline_sync(pScrn, pPix,
409				     accel_state->vline_crtc,
410				     accel_state->vline_y1,
411				     accel_state->vline_y2);
412
413    evergreen_finish_op(pScrn, 16);
414}
415
416static void
417EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn,
418			  int srcX, int srcY,
419			  int dstX, int dstY,
420			  int w, int h)
421{
422    RADEONInfoPtr info = RADEONPTR(pScrn);
423    struct radeon_accel_state *accel_state = info->accel_state;
424    float *vb;
425
426    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
427
428    vb[0] = (float)dstX;
429    vb[1] = (float)dstY;
430    vb[2] = (float)srcX;
431    vb[3] = (float)srcY;
432
433    vb[4] = (float)dstX;
434    vb[5] = (float)(dstY + h);
435    vb[6] = (float)srcX;
436    vb[7] = (float)(srcY + h);
437
438    vb[8] = (float)(dstX + w);
439    vb[9] = (float)(dstY + h);
440    vb[10] = (float)(srcX + w);
441    vb[11] = (float)(srcY + h);
442
443    radeon_vbo_commit(pScrn, &accel_state->vbo);
444}
445
446static Bool
447EVERGREENPrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
448		     int xdir, int ydir,
449		     int rop,
450		     Pixel planemask)
451{
452    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
453    RADEONInfoPtr info = RADEONPTR(pScrn);
454    struct radeon_accel_state *accel_state = info->accel_state;
455    struct r600_accel_object src_obj, dst_obj;
456
457    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
458	RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n"));
459    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
460	RADEON_FALLBACK(("EVERGREENCheckDatatype dst failed\n"));
461    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
462	RADEON_FALLBACK(("Invalid planemask\n"));
463
464    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
465    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
466
467    accel_state->same_surface = FALSE;
468
469    src_obj.bo = radeon_get_pixmap_bo(pSrc);
470    dst_obj.bo = radeon_get_pixmap_bo(pDst);
471    dst_obj.surface = radeon_get_pixmap_surface(pDst);
472    src_obj.surface = radeon_get_pixmap_surface(pSrc);
473    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
474    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
475    if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
476	accel_state->same_surface = TRUE;
477
478    src_obj.width = pSrc->drawable.width;
479    src_obj.height = pSrc->drawable.height;
480    src_obj.bpp = pSrc->drawable.bitsPerPixel;
481    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
482
483    dst_obj.width = pDst->drawable.width;
484    dst_obj.height = pDst->drawable.height;
485    dst_obj.bpp = pDst->drawable.bitsPerPixel;
486    if (radeon_get_pixmap_shared(pDst) == TRUE)
487	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
488    else
489	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
490
491    if (!R600SetAccelState(pScrn,
492			   &src_obj,
493			   NULL,
494			   &dst_obj,
495			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
496			   rop, planemask))
497	return FALSE;
498
499    if (accel_state->same_surface == TRUE) {
500	unsigned height = RADEON_ALIGN(pDst->drawable.height,
501				       drmmode_get_height_align(pScrn, accel_state->dst_obj.tiling_flags));
502	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
503
504	if (accel_state->dst_obj.surface)
505		size = accel_state->dst_obj.surface->bo_size;
506
507	if (accel_state->copy_area_bo) {
508	    radeon_bo_unref(accel_state->copy_area_bo);
509	    accel_state->copy_area_bo = NULL;
510	}
511	accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
512						   RADEON_GEM_DOMAIN_VRAM,
513						   0);
514	if (accel_state->copy_area_bo == NULL)
515	    RADEON_FALLBACK(("temp copy surface alloc failed\n"));
516
517	radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
518					  0, RADEON_GEM_DOMAIN_VRAM);
519	if (radeon_cs_space_check(info->cs)) {
520	    radeon_bo_unref(accel_state->copy_area_bo);
521	    accel_state->copy_area_bo = NULL;
522	    return FALSE;
523	}
524	accel_state->copy_area = (void*)accel_state->copy_area_bo;
525    } else
526	EVERGREENDoPrepareCopy(pScrn);
527
528    if (accel_state->vsync)
529	RADEONVlineHelperClear(pScrn);
530
531    accel_state->dst_pix = pDst;
532    accel_state->src_pix = pSrc;
533    accel_state->xdir = xdir;
534    accel_state->ydir = ydir;
535
536    return TRUE;
537}
538
539static void
540EVERGREENDoneCopy(PixmapPtr pDst)
541{
542    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
543    RADEONInfoPtr info = RADEONPTR(pScrn);
544    struct radeon_accel_state *accel_state = info->accel_state;
545
546    if (!accel_state->same_surface)
547	EVERGREENDoCopyVline(pDst);
548
549    if (accel_state->copy_area)
550	accel_state->copy_area = NULL;
551
552}
553
554static void
555EVERGREENCopy(PixmapPtr pDst,
556	      int srcX, int srcY,
557	      int dstX, int dstY,
558	      int w, int h)
559{
560    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
561    RADEONInfoPtr info = RADEONPTR(pScrn);
562    struct radeon_accel_state *accel_state = info->accel_state;
563
564    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
565	return;
566
567    if (CS_FULL(info->cs)) {
568	EVERGREENDoneCopy(info->accel_state->dst_pix);
569	radeon_cs_flush_indirect(pScrn);
570	EVERGREENPrepareCopy(accel_state->src_pix,
571			     accel_state->dst_pix,
572			     accel_state->xdir,
573			     accel_state->ydir,
574			     accel_state->rop,
575			     accel_state->planemask);
576    }
577
578    if (accel_state->vsync)
579	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
580
581    if (accel_state->same_surface &&
582	    (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) {
583	EVERGREENDoPrepareCopy(pScrn);
584	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
585	EVERGREENDoCopyVline(pDst);
586    } else if (accel_state->same_surface && accel_state->copy_area) {
587	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
588	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
589	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
590	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
591	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
592	int orig_rop = accel_state->rop;
593	struct radeon_surface *orig_dst_surface = accel_state->dst_obj.surface;
594	struct radeon_surface *orig_src_surface = accel_state->src_obj[0].surface;
595
596	/* src to tmp */
597	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
598	accel_state->dst_obj.bo = accel_state->copy_area_bo;
599	accel_state->dst_obj.tiling_flags = 0;
600	accel_state->rop = 3;
601	accel_state->dst_obj.surface = NULL;
602	EVERGREENDoPrepareCopy(pScrn);
603	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
604	EVERGREENDoCopy(pScrn);
605
606	/* tmp to dst */
607	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
608	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
609	accel_state->src_obj[0].tiling_flags = 0;
610	accel_state->src_obj[0].surface = NULL;
611	accel_state->dst_obj.domain = orig_dst_domain;
612	accel_state->dst_obj.bo = orig_bo;
613	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
614	accel_state->rop = orig_rop;
615	accel_state->dst_obj.surface = orig_dst_surface;
616	EVERGREENDoPrepareCopy(pScrn);
617	EVERGREENAppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
618	EVERGREENDoCopyVline(pDst);
619
620	/* restore state */
621	accel_state->src_obj[0].domain = orig_src_domain;
622	accel_state->src_obj[0].bo = orig_bo;
623	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
624	accel_state->src_obj[0].surface = orig_src_surface;
625    } else
626	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
627
628}
629
630struct blendinfo {
631    Bool dst_alpha;
632    Bool src_alpha;
633    uint32_t blend_cntl;
634};
635
636static struct blendinfo EVERGREENBlendOp[] = {
637    /* Clear */
638    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
639    /* Src */
640    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
641    /* Dst */
642    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
643    /* Over */
644    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
645    /* OverReverse */
646    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
647    /* In */
648    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
649    /* InReverse */
650    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
651    /* Out */
652    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
653    /* OutReverse */
654    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
655    /* Atop */
656    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
657    /* AtopReverse */
658    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
659    /* Xor */
660    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
661    /* Add */
662    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
663};
664
665struct formatinfo {
666    unsigned int fmt;
667    uint32_t card_fmt;
668};
669
670static struct formatinfo EVERGREENTexFormats[] = {
671    {PICT_a8r8g8b8,	FMT_8_8_8_8},
672    {PICT_x8r8g8b8,	FMT_8_8_8_8},
673    {PICT_a8b8g8r8,	FMT_8_8_8_8},
674    {PICT_x8b8g8r8,	FMT_8_8_8_8},
675    {PICT_b8g8r8a8,	FMT_8_8_8_8},
676    {PICT_b8g8r8x8,	FMT_8_8_8_8},
677    {PICT_r5g6b5,	FMT_5_6_5},
678    {PICT_a1r5g5b5,	FMT_1_5_5_5},
679    {PICT_x1r5g5b5,     FMT_1_5_5_5},
680    {PICT_a8,		FMT_8},
681};
682
683static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
684{
685    uint32_t sblend, dblend;
686
687    sblend = EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
688    dblend = EVERGREENBlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
689
690    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
691     * it as always 1.
692     */
693    if (PICT_FORMAT_A(dst_format) == 0 && EVERGREENBlendOp[op].dst_alpha) {
694	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
695	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
696	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
697	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
698    }
699
700    /* If the source alpha is being used, then we should only be in a case where
701     * the source blend factor is 0, and the source blend value is the mask
702     * channels multiplied by the source picture's alpha.
703     */
704    if (pMask && pMask->componentAlpha && EVERGREENBlendOp[op].src_alpha) {
705	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
706	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
707	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
708	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
709	}
710
711	/* With some tricks, we can still accelerate PictOpOver with solid src.
712	 * This is commonly used for text rendering, so it's worth the extra
713	 * effort.
714	 */
715	if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) {
716	    sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift);
717	}
718    }
719
720    return sblend | dblend;
721}
722
723static Bool EVERGREENGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
724{
725    switch (pDstPicture->format) {
726    case PICT_a8r8g8b8:
727    case PICT_x8r8g8b8:
728    case PICT_a8b8g8r8:
729    case PICT_x8b8g8r8:
730    case PICT_b8g8r8a8:
731    case PICT_b8g8r8x8:
732	*dst_format = COLOR_8_8_8_8;
733	break;
734    case PICT_r5g6b5:
735	*dst_format = COLOR_5_6_5;
736	break;
737    case PICT_a1r5g5b5:
738    case PICT_x1r5g5b5:
739	*dst_format = COLOR_1_5_5_5;
740	break;
741    case PICT_a8:
742	*dst_format = COLOR_8;
743	break;
744    default:
745	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
746	       (int)pDstPicture->format));
747    }
748    return TRUE;
749}
750
751static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict,
752					   PicturePtr pDstPict,
753					   int op,
754					   int unit)
755{
756    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
757    unsigned int i;
758
759    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
760	if (EVERGREENTexFormats[i].fmt == pPict->format)
761	    break;
762    }
763    if (i == sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]))
764	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
765			 (int)pPict->format));
766
767    if (pPict->filter != PictFilterNearest &&
768	pPict->filter != PictFilterBilinear)
769	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
770
771    /* for REPEAT_NONE, Render semantics are that sampling outside the source
772     * picture results in alpha=0 pixels. We can implement this with a border color
773     * *if* our source texture has an alpha channel, otherwise we need to fall
774     * back. If we're not transformed then we hope that upper layers have clipped
775     * rendering to the bounds of the source drawable, in which case it doesn't
776     * matter. I have not, however, verified that the X server always does such
777     * clipping.
778     */
779    /* FIXME evergreen */
780    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
781	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
782	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
783    }
784
785    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
786	RADEON_FALLBACK(("non-affine transforms not supported\n"));
787
788    return TRUE;
789}
790
791static void EVERGREENXFormSetup(PicturePtr pPict, ScrnInfoPtr pScrn,
792				int unit, float *vs_alu_consts)
793{
794    RADEONInfoPtr info = RADEONPTR(pScrn);
795    struct radeon_accel_state *accel_state = info->accel_state;
796    int const_offset = unit * 8;
797    int w, h;
798
799    if (pPict->pDrawable) {
800	w = pPict->pDrawable->width;
801	h = pPict->pDrawable->height;
802    } else {
803	w = 1;
804	h = 1;
805    }
806
807    if (pPict->transform != 0) {
808	accel_state->is_transform[unit] = TRUE;
809	accel_state->transform[unit] = pPict->transform;
810
811	vs_alu_consts[0 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][0]);
812	vs_alu_consts[1 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][1]);
813	vs_alu_consts[2 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][2]);
814	vs_alu_consts[3 + const_offset] = 1.0 / w;
815
816	vs_alu_consts[4 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][0]);
817	vs_alu_consts[5 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][1]);
818	vs_alu_consts[6 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][2]);
819	vs_alu_consts[7 + const_offset] = 1.0 / h;
820    } else {
821	accel_state->is_transform[unit] = FALSE;
822
823	vs_alu_consts[0 + const_offset] = 1.0;
824	vs_alu_consts[1 + const_offset] = 0.0;
825	vs_alu_consts[2 + const_offset] = 0.0;
826	vs_alu_consts[3 + const_offset] = 1.0 / w;
827
828	vs_alu_consts[4 + const_offset] = 0.0;
829	vs_alu_consts[5 + const_offset] = 1.0;
830	vs_alu_consts[6 + const_offset] = 0.0;
831	vs_alu_consts[7 + const_offset] = 1.0 / h;
832    }
833
834}
835
836static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix,
837				  int unit)
838{
839    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
840    RADEONInfoPtr info = RADEONPTR(pScrn);
841    struct radeon_accel_state *accel_state = info->accel_state;
842    unsigned int repeatType;
843    unsigned int i;
844    tex_resource_t  tex_res;
845    tex_sampler_t   tex_samp;
846    int pix_r, pix_g, pix_b, pix_a;
847
848    CLEAR (tex_res);
849    CLEAR (tex_samp);
850
851    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
852	if (EVERGREENTexFormats[i].fmt == pPict->format)
853	    break;
854    }
855
856    /* Texture */
857    if (pPict->pDrawable) {
858	tex_res.w               = pPict->pDrawable->width;
859	tex_res.h               = pPict->pDrawable->height;
860	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
861    } else {
862	tex_res.w               = 1;
863	tex_res.h               = 1;
864	repeatType              = RepeatNormal;
865    }
866
867    tex_res.id                  = unit;
868    tex_res.pitch               = accel_state->src_obj[unit].pitch;
869    tex_res.depth               = 0;
870    tex_res.dim                 = SQ_TEX_DIM_2D;
871    tex_res.base                = 0;
872    tex_res.mip_base            = 0;
873    tex_res.size                = accel_state->src_size[unit];
874    tex_res.format              = EVERGREENTexFormats[i].card_fmt;
875    tex_res.bo                  = accel_state->src_obj[unit].bo;
876    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
877    tex_res.surface             = accel_state->src_obj[unit].surface;
878
879#if X_BYTE_ORDER == X_BIG_ENDIAN
880    switch (accel_state->src_obj[unit].bpp) {
881    case 16:
882	tex_res.endian = SQ_ENDIAN_8IN16;
883	break;
884    case 32:
885	tex_res.endian = SQ_ENDIAN_8IN32;
886	break;
887    default :
888	break;
889    }
890#endif
891
892    /* component swizzles */
893    switch (pPict->format) {
894    case PICT_a1r5g5b5:
895    case PICT_a8r8g8b8:
896	pix_r = SQ_SEL_Z; /* R */
897	pix_g = SQ_SEL_Y; /* G */
898	pix_b = SQ_SEL_X; /* B */
899	pix_a = SQ_SEL_W; /* A */
900	break;
901    case PICT_a8b8g8r8:
902	pix_r = SQ_SEL_X; /* R */
903	pix_g = SQ_SEL_Y; /* G */
904	pix_b = SQ_SEL_Z; /* B */
905	pix_a = SQ_SEL_W; /* A */
906	break;
907    case PICT_x8b8g8r8:
908	pix_r = SQ_SEL_X; /* R */
909	pix_g = SQ_SEL_Y; /* G */
910	pix_b = SQ_SEL_Z; /* B */
911	pix_a = SQ_SEL_1; /* A */
912	break;
913    case PICT_b8g8r8a8:
914	pix_r = SQ_SEL_Y; /* R */
915	pix_g = SQ_SEL_Z; /* G */
916	pix_b = SQ_SEL_W; /* B */
917	pix_a = SQ_SEL_X; /* A */
918	break;
919    case PICT_b8g8r8x8:
920	pix_r = SQ_SEL_Y; /* R */
921	pix_g = SQ_SEL_Z; /* G */
922	pix_b = SQ_SEL_W; /* B */
923	pix_a = SQ_SEL_1; /* A */
924	break;
925    case PICT_x1r5g5b5:
926    case PICT_x8r8g8b8:
927    case PICT_r5g6b5:
928	pix_r = SQ_SEL_Z; /* R */
929	pix_g = SQ_SEL_Y; /* G */
930	pix_b = SQ_SEL_X; /* B */
931	pix_a = SQ_SEL_1; /* A */
932	break;
933    case PICT_a8:
934	pix_r = SQ_SEL_0; /* R */
935	pix_g = SQ_SEL_0; /* G */
936	pix_b = SQ_SEL_0; /* B */
937	pix_a = SQ_SEL_X; /* A */
938	break;
939    default:
940	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
941    }
942
943    if (unit == 0) {
944	if (!accel_state->msk_pic) {
945	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
946		pix_r = SQ_SEL_0;
947		pix_g = SQ_SEL_0;
948		pix_b = SQ_SEL_0;
949	    }
950
951	    if (PICT_FORMAT_A(pPict->format) == 0)
952		pix_a = SQ_SEL_1;
953	} else {
954	    if (accel_state->component_alpha) {
955		if (accel_state->src_alpha) {
956		    if (PICT_FORMAT_A(pPict->format) == 0) {
957			pix_r = SQ_SEL_1;
958			pix_g = SQ_SEL_1;
959			pix_b = SQ_SEL_1;
960			pix_a = SQ_SEL_1;
961		    } else {
962			pix_r = pix_a;
963			pix_g = pix_a;
964			pix_b = pix_a;
965		    }
966		} else {
967		    if (PICT_FORMAT_A(pPict->format) == 0)
968			pix_a = SQ_SEL_1;
969		}
970	    } else {
971		if (PICT_FORMAT_RGB(pPict->format) == 0) {
972		    pix_r = SQ_SEL_0;
973		    pix_g = SQ_SEL_0;
974		    pix_b = SQ_SEL_0;
975		}
976
977		if (PICT_FORMAT_A(pPict->format) == 0)
978		    pix_a = SQ_SEL_1;
979	    }
980	}
981    } else {
982	if (accel_state->component_alpha) {
983	    if (PICT_FORMAT_A(pPict->format) == 0)
984		pix_a = SQ_SEL_1;
985	} else {
986	    if (PICT_FORMAT_A(pPict->format) == 0) {
987		pix_r = SQ_SEL_1;
988		pix_g = SQ_SEL_1;
989		pix_b = SQ_SEL_1;
990		pix_a = SQ_SEL_1;
991	    } else {
992		pix_r = pix_a;
993		pix_g = pix_a;
994		pix_b = pix_a;
995	    }
996	}
997    }
998
999    tex_res.dst_sel_x           = pix_r; /* R */
1000    tex_res.dst_sel_y           = pix_g; /* G */
1001    tex_res.dst_sel_z           = pix_b; /* B */
1002    tex_res.dst_sel_w           = pix_a; /* A */
1003
1004    tex_res.base_level          = 0;
1005    tex_res.last_level          = 0;
1006    tex_res.perf_modulation     = 0;
1007    if ((accel_state->src_obj[unit].tiling_flags & RADEON_TILING_MASK) ==
1008	RADEON_TILING_LINEAR)
1009	tex_res.array_mode          = 0;
1010    evergreen_set_tex_resource  (pScrn, &tex_res, accel_state->src_obj[unit].domain);
1011
1012    tex_samp.id                 = unit;
1013    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1014
1015    switch (repeatType) {
1016    case RepeatNormal:
1017	tex_samp.clamp_x            = SQ_TEX_WRAP;
1018	tex_samp.clamp_y            = SQ_TEX_WRAP;
1019	break;
1020    case RepeatPad:
1021	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1022	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1023	break;
1024    case RepeatReflect:
1025	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1026	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1027	break;
1028    case RepeatNone:
1029	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1030	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1031	break;
1032    default:
1033	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1034    }
1035
1036    switch (pPict->filter) {
1037    case PictFilterNearest:
1038	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1039	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1040	tex_samp.mc_coord_truncate  = 1;
1041	break;
1042    case PictFilterBilinear:
1043	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1044	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1045	break;
1046    default:
1047	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1048    }
1049
1050    tex_samp.clamp_z            = SQ_TEX_WRAP;
1051    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1052    tex_samp.mip_filter         = 0;			/* no mipmap */
1053    evergreen_set_tex_sampler   (pScrn, &tex_samp);
1054
1055    return TRUE;
1056}
1057
1058static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
1059				    PicturePtr pMaskPicture,
1060				    PicturePtr pDstPicture)
1061{
1062    uint32_t tmp1;
1063    PixmapPtr pSrcPixmap, pDstPixmap;
1064
1065    /* Check for unsupported compositing operations. */
1066    if (op >= (int) (sizeof(EVERGREENBlendOp) / sizeof(EVERGREENBlendOp[0])))
1067	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1068
1069    if (pSrcPicture->pDrawable) {
1070	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1071
1072	if (pSrcPixmap->drawable.width >= 16384 ||
1073	    pSrcPixmap->drawable.height >= 16384) {
1074	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1075			     pSrcPixmap->drawable.width,
1076			     pSrcPixmap->drawable.height));
1077	}
1078
1079	if (!EVERGREENCheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1080	    return FALSE;
1081    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1082	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1083
1084    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1085
1086    if (pDstPixmap->drawable.width >= 16384 ||
1087	pDstPixmap->drawable.height >= 16384) {
1088	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1089			 pDstPixmap->drawable.width,
1090			 pDstPixmap->drawable.height));
1091    }
1092
1093    if (pMaskPicture) {
1094	PixmapPtr pMaskPixmap;
1095
1096	if (pMaskPicture->pDrawable) {
1097	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1098
1099	    if (pMaskPixmap->drawable.width >= 16384 ||
1100		pMaskPixmap->drawable.height >= 16384) {
1101	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1102			       pMaskPixmap->drawable.width,
1103			       pMaskPixmap->drawable.height));
1104	    }
1105
1106	    if (pMaskPicture->componentAlpha) {
1107		/* Check if it's component alpha that relies on a source alpha and
1108		 * on the source value.  We can only get one of those into the
1109		 * single source value that we get to blend with.
1110		 *
1111		 * We can cheat a bit if the src is solid, though. PictOpOver
1112		 * can use the constant blend color to sneak a second blend
1113		 * source in.
1114		 */
1115		if (EVERGREENBlendOp[op].src_alpha &&
1116		    (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1117		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1118		    if (pSrcPicture->pDrawable || op != PictOpOver)
1119			RADEON_FALLBACK(("Component alpha not supported with source "
1120					 "alpha and source value blending.\n"));
1121		}
1122	    }
1123
1124	    if (!EVERGREENCheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1125		return FALSE;
1126	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1127	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1128    }
1129
1130    if (!EVERGREENGetDestFormat(pDstPicture, &tmp1))
1131	return FALSE;
1132
1133    return TRUE;
1134
1135}
1136
1137static void EVERGREENSetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
1138{
1139    RADEONInfoPtr info = RADEONPTR(pScrn);
1140    struct radeon_accel_state *accel_state = info->accel_state;
1141    float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
1142
1143    uint32_t w = (fg >> 24) & 0xff;
1144    uint32_t z = (fg >> 16) & 0xff;
1145    uint32_t y = (fg >> 8) & 0xff;
1146    uint32_t x = (fg >> 0) & 0xff;
1147    float xf = (float)x / 255; /* R */
1148    float yf = (float)y / 255; /* G */
1149    float zf = (float)z / 255; /* B */
1150    float wf = (float)w / 255; /* A */
1151
1152    /* component swizzles */
1153    switch (format) {
1154	case PICT_a1r5g5b5:
1155	case PICT_a8r8g8b8:
1156	    pix_r = zf; /* R */
1157	    pix_g = yf; /* G */
1158	    pix_b = xf; /* B */
1159	    pix_a = wf; /* A */
1160	    break;
1161	case PICT_a8b8g8r8:
1162	    pix_r = xf; /* R */
1163	    pix_g = yf; /* G */
1164	    pix_b = zf; /* B */
1165	    pix_a = wf; /* A */
1166	    break;
1167	case PICT_x8b8g8r8:
1168	    pix_r = xf; /* R */
1169	    pix_g = yf; /* G */
1170	    pix_b = zf; /* B */
1171	    pix_a = 1.0; /* A */
1172	    break;
1173	case PICT_b8g8r8a8:
1174	    pix_r = yf; /* R */
1175	    pix_g = zf; /* G */
1176	    pix_b = wf; /* B */
1177	    pix_a = xf; /* A */
1178	    break;
1179	case PICT_b8g8r8x8:
1180	    pix_r = yf; /* R */
1181	    pix_g = zf; /* G */
1182	    pix_b = wf; /* B */
1183	    pix_a = 1.0; /* A */
1184	    break;
1185	case PICT_x1r5g5b5:
1186	case PICT_x8r8g8b8:
1187	case PICT_r5g6b5:
1188	    pix_r = zf; /* R */
1189	    pix_g = yf; /* G */
1190	    pix_b = xf; /* B */
1191	    pix_a = 1.0; /* A */
1192	    break;
1193	case PICT_a8:
1194	    pix_r = 0.0; /* R */
1195	    pix_g = 0.0; /* G */
1196	    pix_b = 0.0; /* B */
1197	    pix_a = xf; /* A */
1198	    break;
1199	default:
1200	    ErrorF("Bad format 0x%x\n", format);
1201    }
1202
1203    if (unit == 0) {
1204	if (!accel_state->msk_pic) {
1205	    if (PICT_FORMAT_RGB(format) == 0) {
1206		pix_r = 0.0;
1207		pix_g = 0.0;
1208		pix_b = 0.0;
1209	    }
1210
1211	    if (PICT_FORMAT_A(format) == 0)
1212		pix_a = 1.0;
1213	} else {
1214	    if (accel_state->component_alpha) {
1215		if (accel_state->src_alpha) {
1216		    /* required for PictOpOver */
1217		    float cblend[4] = { pix_r / pix_a, pix_g / pix_a,
1218					pix_b / pix_a, pix_a / pix_a };
1219		    evergreen_set_blend_color(pScrn, cblend);
1220
1221		    if (PICT_FORMAT_A(format) == 0) {
1222			pix_r = 1.0;
1223			pix_g = 1.0;
1224			pix_b = 1.0;
1225			pix_a = 1.0;
1226		    } else {
1227			pix_r = pix_a;
1228			pix_g = pix_a;
1229			pix_b = pix_a;
1230		    }
1231		} else {
1232		    if (PICT_FORMAT_A(format) == 0)
1233			pix_a = 1.0;
1234		}
1235	    } else {
1236		if (PICT_FORMAT_RGB(format) == 0) {
1237		    pix_r = 0;
1238		    pix_g = 0;
1239		    pix_b = 0;
1240		}
1241
1242		if (PICT_FORMAT_A(format) == 0)
1243		    pix_a = 1.0;
1244	    }
1245	}
1246    } else {
1247	if (accel_state->component_alpha) {
1248	    if (PICT_FORMAT_A(format) == 0)
1249		pix_a = 1.0;
1250	} else {
1251	    if (PICT_FORMAT_A(format) == 0) {
1252		pix_r = 1.0;
1253		pix_g = 1.0;
1254		pix_b = 1.0;
1255		pix_a = 1.0;
1256	    } else {
1257		pix_r = pix_a;
1258		pix_g = pix_a;
1259		pix_b = pix_a;
1260	    }
1261	}
1262    }
1263
1264    buf[0] = pix_r;
1265    buf[1] = pix_g;
1266    buf[2] = pix_b;
1267    buf[3] = pix_a;
1268}
1269
1270static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
1271				      PicturePtr pMaskPicture, PicturePtr pDstPicture,
1272				      PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1273{
1274    ScreenPtr pScreen = pDst->drawable.pScreen;
1275    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1276    RADEONInfoPtr info = RADEONPTR(pScrn);
1277    struct radeon_accel_state *accel_state = info->accel_state;
1278    uint32_t dst_format;
1279    cb_config_t cb_conf;
1280    shader_config_t vs_conf, ps_conf;
1281    const_config_t vs_const_conf;
1282    struct r600_accel_object src_obj, mask_obj, dst_obj;
1283    float *cbuf;
1284    uint32_t ps_bool_consts = 0;
1285
1286    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1287	return FALSE;
1288
1289    if (pSrc) {
1290	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1291	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1292	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1293	src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1294	src_obj.width = pSrc->drawable.width;
1295	src_obj.height = pSrc->drawable.height;
1296	src_obj.bpp = pSrc->drawable.bitsPerPixel;
1297	src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1298    }
1299
1300    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1301    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1302    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1303    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1304    dst_obj.width = pDst->drawable.width;
1305    dst_obj.height = pDst->drawable.height;
1306    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1307    if (radeon_get_pixmap_shared(pDst) == TRUE)
1308	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1309    else
1310	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1311
1312    if (pMaskPicture) {
1313	if (pMask) {
1314	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1315	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1316	    mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1317	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1318	    mask_obj.width = pMask->drawable.width;
1319	    mask_obj.height = pMask->drawable.height;
1320	    mask_obj.bpp = pMask->drawable.bitsPerPixel;
1321	    mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1322	}
1323
1324	accel_state->msk_pic = pMaskPicture;
1325	if (pMaskPicture->componentAlpha) {
1326	    accel_state->component_alpha = TRUE;
1327	    if (EVERGREENBlendOp[op].src_alpha)
1328		accel_state->src_alpha = TRUE;
1329	    else
1330		accel_state->src_alpha = FALSE;
1331	} else {
1332	    accel_state->component_alpha = FALSE;
1333	    accel_state->src_alpha = FALSE;
1334	}
1335    } else {
1336	accel_state->msk_pic = NULL;
1337	accel_state->component_alpha = FALSE;
1338	accel_state->src_alpha = FALSE;
1339    }
1340
1341    if (!R600SetAccelState(pScrn,
1342		pSrc ? &src_obj : NULL,
1343		(pMaskPicture && pMask) ? &mask_obj : NULL,
1344		&dst_obj,
1345		accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1346		3, 0xffffffff))
1347	return FALSE;
1348
1349    if (!EVERGREENGetDestFormat(pDstPicture, &dst_format))
1350	return FALSE;
1351
1352    CLEAR (cb_conf);
1353    CLEAR (vs_conf);
1354    CLEAR (ps_conf);
1355    CLEAR (vs_const_conf);
1356
1357    if (pMask)
1358        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1359    else
1360        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1361
1362    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
1363
1364    radeon_cp_start(pScrn);
1365
1366    evergreen_set_default_state(pScrn);
1367
1368    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1369    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1370    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1371
1372    if (pSrc) {
1373	if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
1374	    radeon_ib_discard(pScrn);
1375	    radeon_cs_flush_indirect(pScrn);
1376	    return FALSE;
1377	}
1378    } else
1379	accel_state->is_transform[0] = FALSE;
1380
1381    if (pMask) {
1382        if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) {
1383	    radeon_ib_discard(pScrn);
1384	    radeon_cs_flush_indirect(pScrn);
1385            return FALSE;
1386        }
1387    } else
1388        accel_state->is_transform[1] = FALSE;
1389
1390    if (pSrc)
1391	ps_bool_consts |= (1 << 0);
1392    if (pMask)
1393	ps_bool_consts |= (1 << 1);
1394    evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
1395
1396    if (pMask) {
1397	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
1398    } else {
1399	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
1400    }
1401
1402    /* Shader */
1403    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1404    vs_conf.shader_size         = accel_state->vs_size;
1405    vs_conf.num_gprs            = 5;
1406    vs_conf.stack_size          = 1;
1407    vs_conf.bo                  = accel_state->shaders_bo;
1408    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1409
1410    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1411    ps_conf.shader_size         = accel_state->ps_size;
1412    ps_conf.num_gprs            = 2;
1413    ps_conf.stack_size          = 1;
1414    ps_conf.clamp_consts        = 0;
1415    ps_conf.export_mode         = 2;
1416    ps_conf.bo                  = accel_state->shaders_bo;
1417    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1418
1419    cb_conf.id = 0;
1420    cb_conf.w = accel_state->dst_obj.pitch;
1421    cb_conf.h = accel_state->dst_obj.height;
1422    cb_conf.base = 0;
1423    cb_conf.format = dst_format;
1424    cb_conf.bo = accel_state->dst_obj.bo;
1425    cb_conf.surface = accel_state->dst_obj.surface;
1426
1427    switch (pDstPicture->format) {
1428    case PICT_a8r8g8b8:
1429    case PICT_x8r8g8b8:
1430    case PICT_a1r5g5b5:
1431    case PICT_x1r5g5b5:
1432    default:
1433	cb_conf.comp_swap = 1; /* ARGB */
1434	break;
1435    case PICT_a8b8g8r8:
1436    case PICT_x8b8g8r8:
1437	cb_conf.comp_swap = 0; /* ABGR */
1438	break;
1439    case PICT_b8g8r8a8:
1440    case PICT_b8g8r8x8:
1441	cb_conf.comp_swap = 3; /* BGRA */
1442	break;
1443    case PICT_r5g6b5:
1444	cb_conf.comp_swap = 2; /* RGB */
1445	break;
1446    case PICT_a8:
1447	cb_conf.comp_swap = 3; /* A */
1448	break;
1449    }
1450    cb_conf.source_format = EXPORT_4C_16BPC;
1451    cb_conf.blend_clamp = 1;
1452    cb_conf.blendcntl = EVERGREENGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1453    cb_conf.blendcntl |= CB_BLEND0_CONTROL__ENABLE_bit;
1454    cb_conf.rop = 3;
1455    cb_conf.pmask = 0xf;
1456    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
1457	RADEON_TILING_LINEAR) {
1458	cb_conf.array_mode = 0;
1459	cb_conf.non_disp_tiling = 1;
1460    }
1461#if X_BYTE_ORDER == X_BIG_ENDIAN
1462    switch (dst_obj.bpp) {
1463    case 16:
1464	cb_conf.endian = ENDIAN_8IN16;
1465	break;
1466    case 32:
1467	cb_conf.endian = ENDIAN_8IN32;
1468	break;
1469    default:
1470	break;
1471    }
1472#endif
1473    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
1474
1475    if (pMask)
1476	evergreen_set_spi(pScrn, (2 - 1), 2);
1477    else
1478	evergreen_set_spi(pScrn, (1 - 1), 1);
1479
1480    /* VS alu constants */
1481    vs_const_conf.size_bytes = 256;
1482    vs_const_conf.type = SHADER_TYPE_VS;
1483    cbuf = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
1484    vs_const_conf.bo = accel_state->cbuf.vb_bo;
1485    vs_const_conf.const_addr = accel_state->cbuf.vb_offset;
1486
1487    vs_const_conf.cpu_ptr = (uint32_t *)(char *)cbuf;
1488    EVERGREENXFormSetup(pSrcPicture, pScrn, 0, cbuf);
1489    if (pMask)
1490        EVERGREENXFormSetup(pMaskPicture, pScrn, 1, cbuf);
1491
1492    if (!pSrc) {
1493	/* solid src color */
1494	EVERGREENSetSolidConsts(pScrn, &cbuf[16], pSrcPicture->format,
1495		pSrcPicture->pSourcePict->solidFill.color, 0);
1496    }
1497
1498    if (!pMaskPicture) {
1499	/* use identity constant if there is no mask */
1500	cbuf[20] = 1.0;
1501	cbuf[21] = 1.0;
1502	cbuf[22] = 1.0;
1503	cbuf[23] = 1.0;
1504    } else if (!pMask) {
1505	/* solid mask color */
1506	EVERGREENSetSolidConsts(pScrn, &cbuf[20], pMaskPicture->format,
1507		pMaskPicture->pSourcePict->solidFill.color, 1);
1508    }
1509
1510    radeon_vbo_commit(pScrn, &accel_state->cbuf);
1511    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
1512
1513    if (accel_state->vsync)
1514	RADEONVlineHelperClear(pScrn);
1515
1516    accel_state->composite_op = op;
1517    accel_state->dst_pic = pDstPicture;
1518    accel_state->src_pic = pSrcPicture;
1519    accel_state->dst_pix = pDst;
1520    accel_state->msk_pix = pMask;
1521    accel_state->src_pix = pSrc;
1522
1523    return TRUE;
1524}
1525
1526static void EVERGREENFinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1527				     struct radeon_accel_state *accel_state)
1528{
1529    int vtx_size;
1530
1531    if (accel_state->vsync)
1532       evergreen_cp_wait_vline_sync(pScrn, pDst,
1533				    accel_state->vline_crtc,
1534				    accel_state->vline_y1,
1535				    accel_state->vline_y2);
1536
1537    vtx_size = accel_state->msk_pix ? 24 : 16;
1538
1539    evergreen_finish_op(pScrn, vtx_size);
1540}
1541
1542static void EVERGREENDoneComposite(PixmapPtr pDst)
1543{
1544    ScreenPtr pScreen = pDst->drawable.pScreen;
1545    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1546    RADEONInfoPtr info = RADEONPTR(pScrn);
1547    struct radeon_accel_state *accel_state = info->accel_state;
1548
1549    EVERGREENFinishComposite(pScrn, pDst, accel_state);
1550}
1551
1552static void EVERGREENComposite(PixmapPtr pDst,
1553			       int srcX, int srcY,
1554			       int maskX, int maskY,
1555			       int dstX, int dstY,
1556			       int w, int h)
1557{
1558    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1559    RADEONInfoPtr info = RADEONPTR(pScrn);
1560    struct radeon_accel_state *accel_state = info->accel_state;
1561    float *vb;
1562
1563    if (CS_FULL(info->cs)) {
1564	EVERGREENFinishComposite(pScrn, pDst, info->accel_state);
1565	radeon_cs_flush_indirect(pScrn);
1566	EVERGREENPrepareComposite(info->accel_state->composite_op,
1567				  info->accel_state->src_pic,
1568				  info->accel_state->msk_pic,
1569				  info->accel_state->dst_pic,
1570				  info->accel_state->src_pix,
1571				  info->accel_state->msk_pix,
1572				  info->accel_state->dst_pix);
1573    }
1574
1575    if (accel_state->vsync)
1576	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1577
1578    if (accel_state->msk_pix) {
1579
1580	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1581
1582	vb[0] = (float)dstX;
1583	vb[1] = (float)dstY;
1584	vb[2] = (float)srcX;
1585	vb[3] = (float)srcY;
1586	vb[4] = (float)maskX;
1587	vb[5] = (float)maskY;
1588
1589	vb[6] = (float)dstX;
1590	vb[7] = (float)(dstY + h);
1591	vb[8] = (float)srcX;
1592	vb[9] = (float)(srcY + h);
1593	vb[10] = (float)maskX;
1594	vb[11] = (float)(maskY + h);
1595
1596	vb[12] = (float)(dstX + w);
1597	vb[13] = (float)(dstY + h);
1598	vb[14] = (float)(srcX + w);
1599	vb[15] = (float)(srcY + h);
1600	vb[16] = (float)(maskX + w);
1601	vb[17] = (float)(maskY + h);
1602
1603	radeon_vbo_commit(pScrn, &accel_state->vbo);
1604
1605    } else {
1606
1607	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1608
1609	vb[0] = (float)dstX;
1610	vb[1] = (float)dstY;
1611	vb[2] = (float)srcX;
1612	vb[3] = (float)srcY;
1613
1614	vb[4] = (float)dstX;
1615	vb[5] = (float)(dstY + h);
1616	vb[6] = (float)srcX;
1617	vb[7] = (float)(srcY + h);
1618
1619	vb[8] = (float)(dstX + w);
1620	vb[9] = (float)(dstY + h);
1621	vb[10] = (float)(srcX + w);
1622	vb[11] = (float)(srcY + h);
1623
1624	radeon_vbo_commit(pScrn, &accel_state->vbo);
1625    }
1626
1627
1628}
1629
1630static Bool
1631EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1632			char *src, int src_pitch)
1633{
1634    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1635    RADEONInfoPtr info = RADEONPTR(pScrn);
1636    struct radeon_accel_state *accel_state = info->accel_state;
1637    struct radeon_exa_pixmap_priv *driver_priv;
1638    struct radeon_bo *scratch = NULL;
1639    struct radeon_bo *copy_dst;
1640    unsigned char *dst;
1641    unsigned size;
1642    uint32_t dst_domain;
1643    int bpp = pDst->drawable.bitsPerPixel;
1644    uint32_t scratch_pitch;
1645    uint32_t copy_pitch;
1646    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1647    int ret;
1648    Bool flush = TRUE;
1649    Bool r;
1650    int i;
1651    struct r600_accel_object src_obj, dst_obj;
1652    uint32_t height, base_align;
1653
1654    if (bpp < 8)
1655	return FALSE;
1656
1657    driver_priv = exaGetPixmapDriverPrivate(pDst);
1658    if (!driver_priv || !driver_priv->bo)
1659	return FALSE;
1660
1661    /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */
1662    copy_dst = driver_priv->bo;
1663    copy_pitch = pDst->devKind;
1664    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1665	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1666	    flush = FALSE;
1667	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain) &&
1668		!(dst_domain & RADEON_GEM_DOMAIN_VRAM))
1669		goto copy;
1670	}
1671    }
1672
1673    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1674    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1675    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1676    size = scratch_pitch * height * (bpp / 8);
1677    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1678    if (scratch == NULL) {
1679	goto copy;
1680    }
1681
1682    src_obj.pitch = scratch_pitch;
1683    src_obj.width = w;
1684    src_obj.height = h;
1685    src_obj.bpp = bpp;
1686    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1687    src_obj.bo = scratch;
1688    src_obj.tiling_flags = 0;
1689    src_obj.surface = NULL;
1690
1691    dst_obj.pitch = dst_pitch_hw;
1692    dst_obj.width = pDst->drawable.width;
1693    dst_obj.height = pDst->drawable.height;
1694    dst_obj.bpp = bpp;
1695    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1696    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1697    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1698    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1699
1700    if (!R600SetAccelState(pScrn,
1701			   &src_obj,
1702			   NULL,
1703			   &dst_obj,
1704			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1705			   3, 0xffffffff)) {
1706        goto copy;
1707    }
1708    copy_dst = scratch;
1709    copy_pitch = scratch_pitch * (bpp / 8);
1710    flush = FALSE;
1711
1712copy:
1713    if (flush)
1714	radeon_cs_flush_indirect(pScrn);
1715
1716    ret = radeon_bo_map(copy_dst, 0);
1717    if (ret) {
1718        r = FALSE;
1719        goto out;
1720    }
1721    r = TRUE;
1722    size = w * bpp / 8;
1723    dst = copy_dst->ptr;
1724    if (copy_dst == driver_priv->bo)
1725	dst += y * copy_pitch + x * bpp / 8;
1726    for (i = 0; i < h; i++) {
1727	memcpy(dst + i * copy_pitch, src, size);
1728        src += src_pitch;
1729    }
1730    radeon_bo_unmap(copy_dst);
1731
1732    if (copy_dst == scratch) {
1733	if (info->accel_state->vsync)
1734	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1735
1736	/* blit from gart to vram */
1737	EVERGREENDoPrepareCopy(pScrn);
1738	EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1739	EVERGREENDoCopyVline(pDst);
1740    }
1741
1742out:
1743    if (scratch)
1744	radeon_bo_unref(scratch);
1745    return r;
1746}
1747
1748static Bool
1749EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
1750			    int h, char *dst, int dst_pitch)
1751{
1752    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1753    RADEONInfoPtr info = RADEONPTR(pScrn);
1754    struct radeon_accel_state *accel_state = info->accel_state;
1755    struct radeon_exa_pixmap_priv *driver_priv;
1756    struct radeon_bo *scratch = NULL;
1757    struct radeon_bo *copy_src;
1758    unsigned size;
1759    uint32_t src_domain = 0;
1760    int bpp = pSrc->drawable.bitsPerPixel;
1761    uint32_t scratch_pitch;
1762    uint32_t copy_pitch;
1763    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1764    int ret;
1765    Bool flush = FALSE;
1766    Bool r;
1767    struct r600_accel_object src_obj, dst_obj;
1768    uint32_t height, base_align;
1769
1770    if (bpp < 8)
1771	return FALSE;
1772
1773    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1774    if (!driver_priv || !driver_priv->bo)
1775	return FALSE;
1776
1777    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1778    copy_src = driver_priv->bo;
1779    copy_pitch = pSrc->devKind;
1780    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1781	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1782	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1783	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1784		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1785		src_domain = 0;
1786	    else /* A write may be scheduled */
1787		flush = TRUE;
1788	}
1789
1790	if (!src_domain)
1791	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1792
1793	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1794	    goto copy;
1795
1796    }
1797
1798    if (!accel_state->allowHWDFS)
1799	goto copy;
1800
1801    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1802    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1803    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1804    size = scratch_pitch * height * (bpp / 8);
1805    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1806    if (scratch == NULL) {
1807	goto copy;
1808    }
1809    radeon_cs_space_reset_bos(info->cs);
1810    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1811				      RADEON_GEM_DOMAIN_VRAM, 0);
1812    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1813    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1814    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1815    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1816    ret = radeon_cs_space_check(info->cs);
1817    if (ret) {
1818	goto copy;
1819    }
1820
1821    src_obj.pitch = src_pitch_hw;
1822    src_obj.width = pSrc->drawable.width;
1823    src_obj.height = pSrc->drawable.height;
1824    src_obj.bpp = bpp;
1825    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1826    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1827    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1828    src_obj.surface = radeon_get_pixmap_surface(pSrc);
1829
1830    dst_obj.pitch = scratch_pitch;
1831    dst_obj.width = w;
1832    dst_obj.height = h;
1833    dst_obj.bo = scratch;
1834    dst_obj.bpp = bpp;
1835    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1836    dst_obj.tiling_flags = 0;
1837    dst_obj.surface = NULL;
1838
1839    if (!R600SetAccelState(pScrn,
1840			   &src_obj,
1841			   NULL,
1842			   &dst_obj,
1843			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1844			   3, 0xffffffff)) {
1845	goto copy;
1846    }
1847
1848    /* blit from vram to gart */
1849    EVERGREENDoPrepareCopy(pScrn);
1850    EVERGREENAppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1851    EVERGREENDoCopy(pScrn);
1852    copy_src = scratch;
1853    copy_pitch = scratch_pitch * (bpp / 8);
1854    flush = TRUE;
1855
1856copy:
1857    if (flush)
1858	radeon_cs_flush_indirect(pScrn);
1859
1860    ret = radeon_bo_map(copy_src, 0);
1861    if (ret) {
1862	ErrorF("failed to map pixmap: %d\n", ret);
1863        r = FALSE;
1864        goto out;
1865    }
1866    r = TRUE;
1867    w *= bpp / 8;
1868    if (copy_src == driver_priv->bo)
1869	size = y * copy_pitch + x * bpp / 8;
1870    else
1871	size = 0;
1872    while (h--) {
1873	memcpy(dst, copy_src->ptr + size, w);
1874	size += copy_pitch;
1875        dst += dst_pitch;
1876    }
1877    radeon_bo_unmap(copy_src);
1878out:
1879    if (scratch)
1880	radeon_bo_unref(scratch);
1881    return r;
1882}
1883
1884static int
1885EVERGREENMarkSync(ScreenPtr pScreen)
1886{
1887    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1888    RADEONInfoPtr info = RADEONPTR(pScrn);
1889    struct radeon_accel_state *accel_state = info->accel_state;
1890
1891    return ++accel_state->exaSyncMarker;
1892
1893}
1894
1895static void
1896EVERGREENSync(ScreenPtr pScreen, int marker)
1897{
1898    return;
1899}
1900
1901static Bool
1902EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1903{
1904    RADEONInfoPtr info = RADEONPTR(pScrn);
1905    struct radeon_accel_state *accel_state = info->accel_state;
1906
1907    /* 512 bytes per shader for now */
1908    int size = 512 * 9;
1909
1910    accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
1911					     RADEON_GEM_DOMAIN_VRAM, 0);
1912    if (accel_state->shaders_bo == NULL) {
1913	ErrorF("Allocating shader failed\n");
1914	return FALSE;
1915    }
1916    return TRUE;
1917}
1918
1919static Bool
1920EVERGREENLoadShaders(ScrnInfoPtr pScrn)
1921{
1922    RADEONInfoPtr info = RADEONPTR(pScrn);
1923    struct radeon_accel_state *accel_state = info->accel_state;
1924    RADEONChipFamily ChipSet = info->ChipFamily;
1925    uint32_t *shader;
1926    int ret;
1927
1928    ret = radeon_bo_map(accel_state->shaders_bo, 1);
1929    if (ret) {
1930	FatalError("failed to map shader %d\n", ret);
1931	return FALSE;
1932    }
1933    shader = accel_state->shaders_bo->ptr;
1934
1935    /*  solid vs --------------------------------------- */
1936    accel_state->solid_vs_offset = 0;
1937    evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1938
1939    /*  solid ps --------------------------------------- */
1940    accel_state->solid_ps_offset = 512;
1941    evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1942
1943    /*  copy vs --------------------------------------- */
1944    accel_state->copy_vs_offset = 1024;
1945    evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1946
1947    /*  copy ps --------------------------------------- */
1948    accel_state->copy_ps_offset = 1536;
1949    evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
1950
1951    /*  comp vs --------------------------------------- */
1952    accel_state->comp_vs_offset = 2048;
1953    evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
1954
1955    /*  comp ps --------------------------------------- */
1956    accel_state->comp_ps_offset = 2560;
1957    evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
1958
1959    /*  xv vs --------------------------------------- */
1960    accel_state->xv_vs_offset = 3072;
1961    evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
1962
1963    /*  xv ps --------------------------------------- */
1964    accel_state->xv_ps_offset = 3584;
1965    evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
1966
1967    radeon_bo_unmap(accel_state->shaders_bo);
1968
1969    return TRUE;
1970}
1971
1972static Bool
1973CAYMANLoadShaders(ScrnInfoPtr pScrn)
1974{
1975    RADEONInfoPtr info = RADEONPTR(pScrn);
1976    struct radeon_accel_state *accel_state = info->accel_state;
1977    RADEONChipFamily ChipSet = info->ChipFamily;
1978    uint32_t *shader;
1979    int ret;
1980
1981    ret = radeon_bo_map(accel_state->shaders_bo, 1);
1982    if (ret) {
1983	FatalError("failed to map shader %d\n", ret);
1984	return FALSE;
1985    }
1986    shader = accel_state->shaders_bo->ptr;
1987
1988    /*  solid vs --------------------------------------- */
1989    accel_state->solid_vs_offset = 0;
1990    cayman_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1991
1992    /*  solid ps --------------------------------------- */
1993    accel_state->solid_ps_offset = 512;
1994    cayman_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1995
1996    /*  copy vs --------------------------------------- */
1997    accel_state->copy_vs_offset = 1024;
1998    cayman_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1999
2000    /*  copy ps --------------------------------------- */
2001    accel_state->copy_ps_offset = 1536;
2002    cayman_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2003
2004    /*  comp vs --------------------------------------- */
2005    accel_state->comp_vs_offset = 2048;
2006    cayman_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2007
2008    /*  comp ps --------------------------------------- */
2009    accel_state->comp_ps_offset = 2560;
2010    cayman_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2011
2012    /*  xv vs --------------------------------------- */
2013    accel_state->xv_vs_offset = 3072;
2014    cayman_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2015
2016    /*  xv ps --------------------------------------- */
2017    accel_state->xv_ps_offset = 3584;
2018    cayman_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2019
2020    radeon_bo_unmap(accel_state->shaders_bo);
2021
2022    return TRUE;
2023}
2024
2025Bool
2026EVERGREENDrawInit(ScreenPtr pScreen)
2027{
2028    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2029    RADEONInfoPtr info   = RADEONPTR(pScrn);
2030
2031    if (info->accel_state->exa == NULL) {
2032	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2033	return FALSE;
2034    }
2035
2036    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2037    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2038
2039    info->accel_state->exa->PrepareSolid = EVERGREENPrepareSolid;
2040    info->accel_state->exa->Solid = EVERGREENSolid;
2041    info->accel_state->exa->DoneSolid = EVERGREENDoneSolid;
2042
2043    info->accel_state->exa->PrepareCopy = EVERGREENPrepareCopy;
2044    info->accel_state->exa->Copy = EVERGREENCopy;
2045    info->accel_state->exa->DoneCopy = EVERGREENDoneCopy;
2046
2047    info->accel_state->exa->MarkSync = EVERGREENMarkSync;
2048    info->accel_state->exa->WaitMarker = EVERGREENSync;
2049
2050    info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2051    info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2052    info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2053    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2054    info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2055    info->accel_state->exa->UploadToScreen = EVERGREENUploadToScreen;
2056    info->accel_state->exa->DownloadFromScreen = EVERGREENDownloadFromScreen;
2057    info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2058#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 6)
2059    info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking;
2060    info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking;
2061#endif
2062    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX |
2063	EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS;
2064    info->accel_state->exa->pixmapOffsetAlign = 256;
2065    info->accel_state->exa->pixmapPitchAlign = 256;
2066
2067    info->accel_state->exa->CheckComposite = EVERGREENCheckComposite;
2068    info->accel_state->exa->PrepareComposite = EVERGREENPrepareComposite;
2069    info->accel_state->exa->Composite = EVERGREENComposite;
2070    info->accel_state->exa->DoneComposite = EVERGREENDoneComposite;
2071
2072    info->accel_state->exa->maxPitchBytes = 32768;
2073    info->accel_state->exa->maxX = 8192;
2074    info->accel_state->exa->maxY = 8192;
2075
2076    /* not supported yet */
2077    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2078	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2079	info->accel_state->vsync = TRUE;
2080    } else
2081	info->accel_state->vsync = FALSE;
2082
2083    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2084	free(info->accel_state->exa);
2085	return FALSE;
2086    }
2087
2088    info->accel_state->XInited3D = FALSE;
2089    info->accel_state->copy_area = NULL;
2090    info->accel_state->src_obj[0].bo = NULL;
2091    info->accel_state->src_obj[1].bo = NULL;
2092    info->accel_state->dst_obj.bo = NULL;
2093    info->accel_state->copy_area_bo = NULL;
2094    info->accel_state->vbo.vb_start_op = -1;
2095    info->accel_state->cbuf.vb_start_op = -1;
2096    info->accel_state->finish_op = evergreen_finish_op;
2097    info->accel_state->vbo.verts_per_op = 3;
2098    info->accel_state->cbuf.verts_per_op = 1;
2099    RADEONVlineHelperClear(pScrn);
2100
2101    radeon_vbo_init_lists(pScrn);
2102
2103    if (!EVERGREENAllocShaders(pScrn, pScreen))
2104	return FALSE;
2105
2106    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
2107	if (!CAYMANLoadShaders(pScrn))
2108	    return FALSE;
2109    } else {
2110	if (!EVERGREENLoadShaders(pScrn))
2111	    return FALSE;
2112    }
2113
2114    exaMarkSync(pScreen);
2115
2116    return TRUE;
2117
2118}
2119