1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_reg.h"
37#include "r600_shader.h"
38#include "r600_reg.h"
39#include "r600_state.h"
40#include "radeon_exa_shared.h"
41#include "radeon_vbo.h"
42
43/* #define SHOW_VERTEXES */
44
45Bool
46R600SetAccelState(ScrnInfoPtr pScrn,
47		  struct r600_accel_object *src0,
48		  struct r600_accel_object *src1,
49		  struct r600_accel_object *dst,
50		  uint32_t vs_offset, uint32_t ps_offset,
51		  int rop, Pixel planemask)
52{
53    RADEONInfoPtr info = RADEONPTR(pScrn);
54    struct radeon_accel_state *accel_state = info->accel_state;
55    uint32_t pitch_align = 0x7;
56    int ret;
57
58    if (src0) {
59	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
60	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
61	if (src0->surface)
62		accel_state->src_size[0] = src0->surface->bo_size;
63
64	/* bad pitch */
65	if (accel_state->src_obj[0].pitch & pitch_align)
66	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
67
68    } else {
69	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
70	accel_state->src_size[0] = 0;
71    }
72
73    if (src1) {
74	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
75	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
76	if (src1->surface) {
77		accel_state->src_size[1] = src1->surface->bo_size;
78	}
79
80	/* bad pitch */
81	if (accel_state->src_obj[1].pitch & pitch_align)
82	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
83
84    } else {
85	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
86	accel_state->src_size[1] = 0;
87    }
88
89    if (dst) {
90	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
91	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
92	if (dst->surface) {
93		accel_state->dst_size = dst->surface->bo_size;
94	} else
95	{
96		accel_state->dst_obj.tiling_flags = 0;
97	}
98	if (accel_state->dst_obj.pitch & pitch_align)
99	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
100
101    } else {
102	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
103	accel_state->dst_size = 0;
104    }
105
106    if (CS_FULL(info->cs))
107	radeon_cs_flush_indirect(pScrn);
108
109    accel_state->rop = rop;
110    accel_state->planemask = planemask;
111
112    accel_state->vs_size = 512;
113    accel_state->ps_size = 512;
114    accel_state->vs_mc_addr = vs_offset;
115    accel_state->ps_mc_addr = ps_offset;
116
117    radeon_cs_space_reset_bos(info->cs);
118    radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
119				      RADEON_GEM_DOMAIN_VRAM, 0);
120    if (accel_state->src_obj[0].bo)
121	radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
122					  accel_state->src_obj[0].domain, 0);
123    if (accel_state->src_obj[1].bo)
124	radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
125					  accel_state->src_obj[1].domain, 0);
126    if (accel_state->dst_obj.bo)
127	radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
128					  0, accel_state->dst_obj.domain);
129    ret = radeon_cs_space_check(info->cs);
130    if (ret)
131	RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
132
133    return TRUE;
134}
135
136static Bool
137R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
138{
139    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
140    RADEONInfoPtr info = RADEONPTR(pScrn);
141    struct radeon_accel_state *accel_state = info->accel_state;
142    cb_config_t     cb_conf;
143    shader_config_t vs_conf, ps_conf;
144    uint32_t a, r, g, b;
145    float ps_alu_consts[4];
146    struct r600_accel_object dst;
147
148    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
149	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
150    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
151	RADEON_FALLBACK(("invalid planemask\n"));
152
153    dst.bo = radeon_get_pixmap_bo(pPix)->bo.radeon;
154    dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
155    dst.surface = radeon_get_pixmap_surface(pPix);
156
157    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
158    dst.width = pPix->drawable.width;
159    dst.height = pPix->drawable.height;
160    dst.bpp = pPix->drawable.bitsPerPixel;
161    dst.domain = RADEON_GEM_DOMAIN_VRAM;
162
163    if (!R600SetAccelState(pScrn,
164			   NULL,
165			   NULL,
166			   &dst,
167			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
168			   alu, pm))
169	return FALSE;
170
171    CLEAR (cb_conf);
172    CLEAR (vs_conf);
173    CLEAR (ps_conf);
174
175    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
176    radeon_cp_start(pScrn);
177
178    r600_set_default_state(pScrn);
179
180    r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
181    r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
182    r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
183
184    /* Shader */
185    vs_conf.shader_addr         = accel_state->vs_mc_addr;
186    vs_conf.shader_size         = accel_state->vs_size;
187    vs_conf.num_gprs            = 2;
188    vs_conf.stack_size          = 0;
189    vs_conf.bo                  = accel_state->shaders_bo;
190    r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
191
192    ps_conf.shader_addr         = accel_state->ps_mc_addr;
193    ps_conf.shader_size         = accel_state->ps_size;
194    ps_conf.num_gprs            = 1;
195    ps_conf.stack_size          = 0;
196    ps_conf.uncached_first_inst = 1;
197    ps_conf.clamp_consts        = 0;
198    ps_conf.export_mode         = 2;
199    ps_conf.bo                  = accel_state->shaders_bo;
200    r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
201
202    cb_conf.id = 0;
203    cb_conf.w = accel_state->dst_obj.pitch;
204    cb_conf.h = accel_state->dst_obj.height;
205    cb_conf.base = 0;
206    cb_conf.bo = accel_state->dst_obj.bo;
207    cb_conf.surface = accel_state->dst_obj.surface;
208
209    if (accel_state->dst_obj.bpp == 8) {
210	cb_conf.format = COLOR_8;
211	cb_conf.comp_swap = 3; /* A */
212    } else if (accel_state->dst_obj.bpp == 16) {
213	cb_conf.format = COLOR_5_6_5;
214	cb_conf.comp_swap = 2; /* RGB */
215#if X_BYTE_ORDER == X_BIG_ENDIAN
216	cb_conf.endian = ENDIAN_8IN16;
217#endif
218    } else {
219	cb_conf.format = COLOR_8_8_8_8;
220	cb_conf.comp_swap = 1; /* ARGB */
221#if X_BYTE_ORDER == X_BIG_ENDIAN
222	cb_conf.endian = ENDIAN_8IN32;
223#endif
224    }
225    cb_conf.source_format = 1;
226    cb_conf.blend_clamp = 1;
227    /* Render setup */
228    if (accel_state->planemask & 0x000000ff)
229	cb_conf.pmask |= 4; /* B */
230    if (accel_state->planemask & 0x0000ff00)
231	cb_conf.pmask |= 2; /* G */
232    if (accel_state->planemask & 0x00ff0000)
233	cb_conf.pmask |= 1; /* R */
234    if (accel_state->planemask & 0xff000000)
235	cb_conf.pmask |= 8; /* A */
236    cb_conf.rop = accel_state->rop;
237    if (accel_state->dst_obj.tiling_flags == 0)
238	cb_conf.array_mode = 0;
239    r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
240
241    r600_set_spi(pScrn, 0, 0);
242
243    /* PS alu constants */
244    if (accel_state->dst_obj.bpp == 16) {
245	r = (fg >> 11) & 0x1f;
246	g = (fg >> 5) & 0x3f;
247	b = (fg >> 0) & 0x1f;
248	ps_alu_consts[0] = (float)r / 31; /* R */
249	ps_alu_consts[1] = (float)g / 63; /* G */
250	ps_alu_consts[2] = (float)b / 31; /* B */
251	ps_alu_consts[3] = 1.0; /* A */
252    } else if (accel_state->dst_obj.bpp == 8) {
253	a = (fg >> 0) & 0xff;
254	ps_alu_consts[0] = 0.0; /* R */
255	ps_alu_consts[1] = 0.0; /* G */
256	ps_alu_consts[2] = 0.0; /* B */
257	ps_alu_consts[3] = (float)a / 255; /* A */
258    } else {
259	a = (fg >> 24) & 0xff;
260	r = (fg >> 16) & 0xff;
261	g = (fg >> 8) & 0xff;
262	b = (fg >> 0) & 0xff;
263	ps_alu_consts[0] = (float)r / 255; /* R */
264	ps_alu_consts[1] = (float)g / 255; /* G */
265	ps_alu_consts[2] = (float)b / 255; /* B */
266	ps_alu_consts[3] = (float)a / 255; /* A */
267    }
268    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
269			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
270
271    if (accel_state->vsync)
272	RADEONVlineHelperClear(pScrn);
273
274    accel_state->dst_pix = pPix;
275    accel_state->fg = fg;
276
277    return TRUE;
278}
279
280static void
281R600DoneSolid(PixmapPtr pPix)
282{
283    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
284    RADEONInfoPtr info = RADEONPTR(pScrn);
285    struct radeon_accel_state *accel_state = info->accel_state;
286
287    if (accel_state->vsync)
288	r600_cp_wait_vline_sync(pScrn, pPix,
289				accel_state->vline_crtc,
290				accel_state->vline_y1,
291				accel_state->vline_y2);
292
293    r600_finish_op(pScrn, 8);
294}
295
296static void
297R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
298{
299    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
300    RADEONInfoPtr info = RADEONPTR(pScrn);
301    struct radeon_accel_state *accel_state = info->accel_state;
302    float *vb;
303
304    if (CS_FULL(info->cs)) {
305	R600DoneSolid(info->accel_state->dst_pix);
306	radeon_cs_flush_indirect(pScrn);
307	R600PrepareSolid(accel_state->dst_pix,
308			 accel_state->rop,
309			 accel_state->planemask,
310			 accel_state->fg);
311    }
312
313    if (accel_state->vsync)
314	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
315
316    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
317
318    vb[0] = (float)x1;
319    vb[1] = (float)y1;
320
321    vb[2] = (float)x1;
322    vb[3] = (float)y2;
323
324    vb[4] = (float)x2;
325    vb[5] = (float)y2;
326
327    radeon_vbo_commit(pScrn, &accel_state->vbo);
328}
329
330static void
331R600DoPrepareCopy(ScrnInfoPtr pScrn)
332{
333    RADEONInfoPtr info = RADEONPTR(pScrn);
334    struct radeon_accel_state *accel_state = info->accel_state;
335    cb_config_t     cb_conf;
336    tex_resource_t  tex_res;
337    tex_sampler_t   tex_samp;
338    shader_config_t vs_conf, ps_conf;
339
340    CLEAR (cb_conf);
341    CLEAR (tex_res);
342    CLEAR (tex_samp);
343    CLEAR (vs_conf);
344    CLEAR (ps_conf);
345
346    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
347    radeon_cp_start(pScrn);
348
349    r600_set_default_state(pScrn);
350
351    r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
352    r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
353    r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
354
355    /* Shader */
356    vs_conf.shader_addr         = accel_state->vs_mc_addr;
357    vs_conf.shader_size         = accel_state->vs_size;
358    vs_conf.num_gprs            = 2;
359    vs_conf.stack_size          = 0;
360    vs_conf.bo                  = accel_state->shaders_bo;
361    r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
362
363    ps_conf.shader_addr         = accel_state->ps_mc_addr;
364    ps_conf.shader_size         = accel_state->ps_size;
365    ps_conf.num_gprs            = 1;
366    ps_conf.stack_size          = 0;
367    ps_conf.uncached_first_inst = 1;
368    ps_conf.clamp_consts        = 0;
369    ps_conf.export_mode         = 2;
370    ps_conf.bo                  = accel_state->shaders_bo;
371    r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
372
373    /* Texture */
374    tex_res.id                  = 0;
375    tex_res.w                   = accel_state->src_obj[0].width;
376    tex_res.h                   = accel_state->src_obj[0].height;
377    tex_res.pitch               = accel_state->src_obj[0].pitch;
378    tex_res.depth               = 0;
379    tex_res.dim                 = SQ_TEX_DIM_2D;
380    tex_res.base                = 0;
381    tex_res.mip_base            = 0;
382    tex_res.size                = accel_state->src_size[0];
383    tex_res.bo                  = accel_state->src_obj[0].bo;
384    tex_res.mip_bo              = accel_state->src_obj[0].bo;
385    tex_res.surface             = accel_state->src_obj[0].surface;
386    if (accel_state->src_obj[0].bpp == 8) {
387	tex_res.format              = FMT_8;
388	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
389	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
390	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
391	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
392    } else if (accel_state->src_obj[0].bpp == 16) {
393	tex_res.format              = FMT_5_6_5;
394	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
395	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
396	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
397	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
398    } else {
399	tex_res.format              = FMT_8_8_8_8;
400	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
401	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
402	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
403	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
404    }
405
406    tex_res.request_size        = 1;
407    tex_res.base_level          = 0;
408    tex_res.last_level          = 0;
409    tex_res.perf_modulation     = 0;
410    if (accel_state->src_obj[0].tiling_flags == 0)
411	tex_res.tile_mode           = 1;
412    r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
413
414    tex_samp.id                 = 0;
415    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
416    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
417    tex_samp.clamp_z            = SQ_TEX_WRAP;
418    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
419    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
420    tex_samp.mc_coord_truncate  = 1;
421    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
422    tex_samp.mip_filter         = 0;			/* no mipmap */
423    r600_set_tex_sampler(pScrn, &tex_samp);
424
425    cb_conf.id = 0;
426    cb_conf.w = accel_state->dst_obj.pitch;
427    cb_conf.h = accel_state->dst_obj.height;
428    cb_conf.base = 0;
429    cb_conf.bo = accel_state->dst_obj.bo;
430    cb_conf.surface = accel_state->dst_obj.surface;
431    if (accel_state->dst_obj.bpp == 8) {
432	cb_conf.format = COLOR_8;
433	cb_conf.comp_swap = 3; /* A */
434    } else if (accel_state->dst_obj.bpp == 16) {
435	cb_conf.format = COLOR_5_6_5;
436	cb_conf.comp_swap = 2; /* RGB */
437    } else {
438	cb_conf.format = COLOR_8_8_8_8;
439	cb_conf.comp_swap = 1; /* ARGB */
440    }
441    cb_conf.source_format = 1;
442    cb_conf.blend_clamp = 1;
443
444    /* Render setup */
445    if (accel_state->planemask & 0x000000ff)
446	cb_conf.pmask |= 4; /* B */
447    if (accel_state->planemask & 0x0000ff00)
448	cb_conf.pmask |= 2; /* G */
449    if (accel_state->planemask & 0x00ff0000)
450	cb_conf.pmask |= 1; /* R */
451    if (accel_state->planemask & 0xff000000)
452	cb_conf.pmask |= 8; /* A */
453    cb_conf.rop = accel_state->rop;
454    if (accel_state->dst_obj.tiling_flags == 0)
455	cb_conf.array_mode = 0;
456    r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
457
458    r600_set_spi(pScrn, (1 - 1), 1);
459
460}
461
462static void
463R600DoCopy(ScrnInfoPtr pScrn)
464{
465    r600_finish_op(pScrn, 16);
466}
467
468static void
469R600DoCopyVline(PixmapPtr pPix)
470{
471    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
472    RADEONInfoPtr info = RADEONPTR(pScrn);
473    struct radeon_accel_state *accel_state = info->accel_state;
474
475    if (accel_state->vsync)
476	r600_cp_wait_vline_sync(pScrn, pPix,
477				accel_state->vline_crtc,
478				accel_state->vline_y1,
479				accel_state->vline_y2);
480
481    r600_finish_op(pScrn, 16);
482}
483
484static void
485R600AppendCopyVertex(ScrnInfoPtr pScrn,
486		     int srcX, int srcY,
487		     int dstX, int dstY,
488		     int w, int h)
489{
490    RADEONInfoPtr info = RADEONPTR(pScrn);
491    struct radeon_accel_state *accel_state = info->accel_state;
492    float *vb;
493
494    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
495
496    vb[0] = (float)dstX;
497    vb[1] = (float)dstY;
498    vb[2] = (float)srcX;
499    vb[3] = (float)srcY;
500
501    vb[4] = (float)dstX;
502    vb[5] = (float)(dstY + h);
503    vb[6] = (float)srcX;
504    vb[7] = (float)(srcY + h);
505
506    vb[8] = (float)(dstX + w);
507    vb[9] = (float)(dstY + h);
508    vb[10] = (float)(srcX + w);
509    vb[11] = (float)(srcY + h);
510
511    radeon_vbo_commit(pScrn, &accel_state->vbo);
512}
513
514static Bool
515R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
516		int xdir, int ydir,
517		int rop,
518		Pixel planemask)
519{
520    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
521    RADEONInfoPtr info = RADEONPTR(pScrn);
522    struct radeon_accel_state *accel_state = info->accel_state;
523    struct r600_accel_object src_obj, dst_obj;
524
525    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
526	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
527    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
528	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
529    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
530	RADEON_FALLBACK(("Invalid planemask\n"));
531
532    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
533    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
534
535    accel_state->same_surface = FALSE;
536
537    src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
538    dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
539    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
540    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
541    src_obj.surface = radeon_get_pixmap_surface(pSrc);
542    dst_obj.surface = radeon_get_pixmap_surface(pDst);
543    if (src_obj.bo == dst_obj.bo)
544	accel_state->same_surface = TRUE;
545
546    src_obj.width = pSrc->drawable.width;
547    src_obj.height = pSrc->drawable.height;
548    src_obj.bpp = pSrc->drawable.bitsPerPixel;
549    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
550
551    dst_obj.width = pDst->drawable.width;
552    dst_obj.height = pDst->drawable.height;
553    dst_obj.bpp = pDst->drawable.bitsPerPixel;
554    if (radeon_get_pixmap_shared(pDst) == TRUE) {
555	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
556    } else
557	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
558
559    if (!R600SetAccelState(pScrn,
560			   &src_obj,
561			   NULL,
562			   &dst_obj,
563			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
564			   rop, planemask))
565	return FALSE;
566
567    if (accel_state->same_surface == TRUE) {
568	unsigned long size = accel_state->dst_obj.surface->bo_size;
569	unsigned long align = accel_state->dst_obj.surface->bo_alignment;
570
571	if (accel_state->copy_area_bo) {
572	    radeon_bo_unref(accel_state->copy_area_bo);
573	    accel_state->copy_area_bo = NULL;
574	}
575	accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
576						   RADEON_GEM_DOMAIN_VRAM,
577						   0);
578	if (!accel_state->copy_area_bo)
579	    RADEON_FALLBACK(("temp copy surface alloc failed\n"));
580
581	radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
582					  0, RADEON_GEM_DOMAIN_VRAM);
583	if (radeon_cs_space_check(info->cs)) {
584	    radeon_bo_unref(accel_state->copy_area_bo);
585	    accel_state->copy_area_bo = NULL;
586	    return FALSE;
587	}
588	accel_state->copy_area = (void*)accel_state->copy_area_bo;
589    } else
590	R600DoPrepareCopy(pScrn);
591
592    if (accel_state->vsync)
593	RADEONVlineHelperClear(pScrn);
594
595    accel_state->dst_pix = pDst;
596    accel_state->src_pix = pSrc;
597    accel_state->xdir = xdir;
598    accel_state->ydir = ydir;
599
600    return TRUE;
601}
602
603static void
604R600DoneCopy(PixmapPtr pDst)
605{
606    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
607    RADEONInfoPtr info = RADEONPTR(pScrn);
608    struct radeon_accel_state *accel_state = info->accel_state;
609
610    if (!accel_state->same_surface)
611	R600DoCopyVline(pDst);
612
613    if (accel_state->copy_area) {
614	accel_state->copy_area = NULL;
615    }
616
617}
618
619static void
620R600Copy(PixmapPtr pDst,
621	 int srcX, int srcY,
622	 int dstX, int dstY,
623	 int w, int h)
624{
625    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
626    RADEONInfoPtr info = RADEONPTR(pScrn);
627    struct radeon_accel_state *accel_state = info->accel_state;
628
629    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
630	return;
631
632    if (CS_FULL(info->cs)) {
633	R600DoneCopy(info->accel_state->dst_pix);
634	radeon_cs_flush_indirect(pScrn);
635	R600PrepareCopy(accel_state->src_pix,
636			accel_state->dst_pix,
637			accel_state->xdir,
638			accel_state->ydir,
639			accel_state->rop,
640			accel_state->planemask);
641    }
642
643    if (accel_state->vsync)
644	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
645
646    if (accel_state->same_surface &&
647	    (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) {
648	R600DoPrepareCopy(pScrn);
649	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
650	R600DoCopyVline(pDst);
651    } else if (accel_state->same_surface && accel_state->copy_area) {
652	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
653	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
654	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
655	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
656	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
657	int orig_rop = accel_state->rop;
658
659	/* src to tmp */
660	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
661	accel_state->dst_obj.bo = accel_state->copy_area_bo;
662	accel_state->dst_obj.tiling_flags = 0;
663	accel_state->rop = 3;
664	R600DoPrepareCopy(pScrn);
665	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
666	R600DoCopy(pScrn);
667
668	/* tmp to dst */
669	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
670	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
671	accel_state->src_obj[0].tiling_flags = 0;
672	accel_state->dst_obj.domain = orig_dst_domain;
673	accel_state->dst_obj.bo = orig_bo;
674	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
675	accel_state->rop = orig_rop;
676	R600DoPrepareCopy(pScrn);
677	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
678	R600DoCopyVline(pDst);
679
680	/* restore state */
681	accel_state->src_obj[0].domain = orig_src_domain;
682	accel_state->src_obj[0].bo = orig_bo;
683	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
684    } else
685	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
686
687}
688
689struct blendinfo {
690    Bool dst_alpha;
691    Bool src_alpha;
692    uint32_t blend_cntl;
693};
694
695static struct blendinfo R600BlendOp[] = {
696    /* Clear */
697    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
698    /* Src */
699    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
700    /* Dst */
701    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
702    /* Over */
703    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
704    /* OverReverse */
705    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
706    /* In */
707    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
708    /* InReverse */
709    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
710    /* Out */
711    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
712    /* OutReverse */
713    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
714    /* Atop */
715    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
716    /* AtopReverse */
717    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
718    /* Xor */
719    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
720    /* Add */
721    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
722};
723
724struct formatinfo {
725    unsigned int fmt;
726    uint32_t card_fmt;
727};
728
729static struct formatinfo R600TexFormats[] = {
730    {PICT_a2r10g10b10,	FMT_2_10_10_10},
731    {PICT_x2r10g10b10,	FMT_2_10_10_10},
732    {PICT_a2b10g10r10,	FMT_2_10_10_10},
733    {PICT_x2b10g10r10,	FMT_2_10_10_10},
734    {PICT_a8r8g8b8,	FMT_8_8_8_8},
735    {PICT_x8r8g8b8,	FMT_8_8_8_8},
736    {PICT_a8b8g8r8,	FMT_8_8_8_8},
737    {PICT_x8b8g8r8,	FMT_8_8_8_8},
738    {PICT_b8g8r8a8,	FMT_8_8_8_8},
739    {PICT_b8g8r8x8,	FMT_8_8_8_8},
740    {PICT_r5g6b5,	FMT_5_6_5},
741    {PICT_a1r5g5b5,	FMT_1_5_5_5},
742    {PICT_x1r5g5b5,     FMT_1_5_5_5},
743    {PICT_a8,		FMT_8},
744};
745
746static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
747{
748    uint32_t sblend, dblend;
749
750    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
751    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
752
753    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
754     * it as always 1.
755     */
756    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
757	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
758	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
759	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
760	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
761    }
762
763    /* If the source alpha is being used, then we should only be in a case where
764     * the source blend factor is 0, and the source blend value is the mask
765     * channels multiplied by the source picture's alpha.
766     */
767    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
768	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
769	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
770	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
771	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
772	}
773
774	/* With some tricks, we can still accelerate PictOpOver with solid src.
775	 * This is commonly used for text rendering, so it's worth the extra
776	 * effort.
777	 */
778	if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) {
779	    sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift);
780	}
781    }
782
783    return sblend | dblend;
784}
785
786static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
787{
788    switch (pDstPicture->format) {
789    case PICT_a2r10g10b10:
790    case PICT_x2r10g10b10:
791    case PICT_a2b10g10r10:
792    case PICT_x2b10g10r10:
793	*dst_format = COLOR_2_10_10_10;
794	break;
795    case PICT_a8r8g8b8:
796    case PICT_x8r8g8b8:
797    case PICT_a8b8g8r8:
798    case PICT_x8b8g8r8:
799    case PICT_b8g8r8a8:
800    case PICT_b8g8r8x8:
801	*dst_format = COLOR_8_8_8_8;
802	break;
803    case PICT_r5g6b5:
804	*dst_format = COLOR_5_6_5;
805	break;
806    case PICT_a1r5g5b5:
807    case PICT_x1r5g5b5:
808	*dst_format = COLOR_1_5_5_5;
809	break;
810    case PICT_a8:
811	*dst_format = COLOR_8;
812	break;
813    default:
814	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
815	       (int)pDstPicture->format));
816    }
817    return TRUE;
818}
819
820static Bool R600CheckCompositeTexture(PicturePtr pPict,
821				      PicturePtr pDstPict,
822				      int op,
823				      int unit)
824{
825    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
826    unsigned int i;
827
828    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
829	if (R600TexFormats[i].fmt == pPict->format)
830	    break;
831    }
832    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
833	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
834			 (int)pPict->format));
835
836    if (pPict->filter != PictFilterNearest &&
837	pPict->filter != PictFilterBilinear)
838	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
839
840    /* for REPEAT_NONE, Render semantics are that sampling outside the source
841     * picture results in alpha=0 pixels. We can implement this with a border color
842     * *if* our source texture has an alpha channel, otherwise we need to fall
843     * back. If we're not transformed then we hope that upper layers have clipped
844     * rendering to the bounds of the source drawable, in which case it doesn't
845     * matter. I have not, however, verified that the X server always does such
846     * clipping.
847     */
848    /* FIXME R6xx */
849    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
850	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
851	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
852    }
853
854    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
855	RADEON_FALLBACK(("non-affine transforms not supported\n"));
856
857    return TRUE;
858}
859
860static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
861					int unit)
862{
863    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
864    RADEONInfoPtr info = RADEONPTR(pScrn);
865    struct radeon_accel_state *accel_state = info->accel_state;
866    unsigned int repeatType;
867    unsigned int i;
868    tex_resource_t  tex_res;
869    tex_sampler_t   tex_samp;
870    int pix_r, pix_g, pix_b, pix_a;
871    float vs_alu_consts[8];
872
873    CLEAR (tex_res);
874    CLEAR (tex_samp);
875
876    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
877	if (R600TexFormats[i].fmt == pPict->format)
878	    break;
879    }
880
881    /* Texture */
882    if (pPict->pDrawable) {
883	tex_res.w               = pPict->pDrawable->width;
884	tex_res.h               = pPict->pDrawable->height;
885	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
886    } else {
887	tex_res.w               = 1;
888	tex_res.h               = 1;
889	repeatType              = RepeatNormal;
890    }
891    tex_res.id                  = unit;
892    tex_res.pitch               = accel_state->src_obj[unit].pitch;
893    tex_res.depth               = 0;
894    tex_res.dim                 = SQ_TEX_DIM_2D;
895    tex_res.base                = 0;
896    tex_res.mip_base            = 0;
897    tex_res.size                = accel_state->src_size[unit];
898    tex_res.format              = R600TexFormats[i].card_fmt;
899    tex_res.bo                  = accel_state->src_obj[unit].bo;
900    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
901    tex_res.surface             = accel_state->src_obj[unit].surface;
902    tex_res.request_size        = 1;
903
904#if X_BYTE_ORDER == X_BIG_ENDIAN
905    switch (accel_state->src_obj[unit].bpp) {
906    case 16:
907	tex_res.endian = SQ_ENDIAN_8IN16;
908	break;
909    case 32:
910	tex_res.endian = SQ_ENDIAN_8IN32;
911	break;
912    default :
913	break;
914    }
915#endif
916
917    /* component swizzles */
918    switch (pPict->format) {
919    case PICT_a2r10g10b10:
920    case PICT_a1r5g5b5:
921    case PICT_a8r8g8b8:
922	pix_r = SQ_SEL_Z; /* R */
923	pix_g = SQ_SEL_Y; /* G */
924	pix_b = SQ_SEL_X; /* B */
925	pix_a = SQ_SEL_W; /* A */
926	break;
927    case PICT_a2b10g10r10:
928    case PICT_a8b8g8r8:
929	pix_r = SQ_SEL_X; /* R */
930	pix_g = SQ_SEL_Y; /* G */
931	pix_b = SQ_SEL_Z; /* B */
932	pix_a = SQ_SEL_W; /* A */
933	break;
934    case PICT_x2b10g10r10:
935    case PICT_x8b8g8r8:
936	pix_r = SQ_SEL_X; /* R */
937	pix_g = SQ_SEL_Y; /* G */
938	pix_b = SQ_SEL_Z; /* B */
939	pix_a = SQ_SEL_1; /* A */
940	break;
941    case PICT_b8g8r8a8:
942	pix_r = SQ_SEL_Y; /* R */
943	pix_g = SQ_SEL_Z; /* G */
944	pix_b = SQ_SEL_W; /* B */
945	pix_a = SQ_SEL_X; /* A */
946	break;
947    case PICT_b8g8r8x8:
948	pix_r = SQ_SEL_Y; /* R */
949	pix_g = SQ_SEL_Z; /* G */
950	pix_b = SQ_SEL_W; /* B */
951	pix_a = SQ_SEL_1; /* A */
952	break;
953    case PICT_x2r10g10b10:
954    case PICT_x1r5g5b5:
955    case PICT_x8r8g8b8:
956    case PICT_r5g6b5:
957	pix_r = SQ_SEL_Z; /* R */
958	pix_g = SQ_SEL_Y; /* G */
959	pix_b = SQ_SEL_X; /* B */
960	pix_a = SQ_SEL_1; /* A */
961	break;
962    case PICT_a8:
963	pix_r = SQ_SEL_0; /* R */
964	pix_g = SQ_SEL_0; /* G */
965	pix_b = SQ_SEL_0; /* B */
966	pix_a = SQ_SEL_X; /* A */
967	break;
968    default:
969	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
970    }
971
972    if (unit == 0) {
973	if (!accel_state->msk_pic) {
974	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
975		pix_r = SQ_SEL_0;
976		pix_g = SQ_SEL_0;
977		pix_b = SQ_SEL_0;
978	    }
979
980	    if (PICT_FORMAT_A(pPict->format) == 0)
981		pix_a = SQ_SEL_1;
982	} else {
983	    if (accel_state->component_alpha) {
984		if (accel_state->src_alpha) {
985		    if (PICT_FORMAT_A(pPict->format) == 0) {
986			pix_r = SQ_SEL_1;
987			pix_g = SQ_SEL_1;
988			pix_b = SQ_SEL_1;
989			pix_a = SQ_SEL_1;
990		    } else {
991			pix_r = pix_a;
992			pix_g = pix_a;
993			pix_b = pix_a;
994		    }
995		} else {
996		    if (PICT_FORMAT_A(pPict->format) == 0)
997			pix_a = SQ_SEL_1;
998		}
999	    } else {
1000		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1001		    pix_r = SQ_SEL_0;
1002		    pix_g = SQ_SEL_0;
1003		    pix_b = SQ_SEL_0;
1004		}
1005
1006		if (PICT_FORMAT_A(pPict->format) == 0)
1007		    pix_a = SQ_SEL_1;
1008	    }
1009	}
1010    } else {
1011	if (accel_state->component_alpha) {
1012	    if (PICT_FORMAT_A(pPict->format) == 0)
1013		pix_a = SQ_SEL_1;
1014	} else {
1015	    if (PICT_FORMAT_A(pPict->format) == 0) {
1016		pix_r = SQ_SEL_1;
1017		pix_g = SQ_SEL_1;
1018		pix_b = SQ_SEL_1;
1019		pix_a = SQ_SEL_1;
1020	    } else {
1021		pix_r = pix_a;
1022		pix_g = pix_a;
1023		pix_b = pix_a;
1024	    }
1025	}
1026    }
1027
1028    tex_res.dst_sel_x           = pix_r; /* R */
1029    tex_res.dst_sel_y           = pix_g; /* G */
1030    tex_res.dst_sel_z           = pix_b; /* B */
1031    tex_res.dst_sel_w           = pix_a; /* A */
1032
1033    tex_res.base_level          = 0;
1034    tex_res.last_level          = 0;
1035    tex_res.perf_modulation     = 0;
1036    if (accel_state->src_obj[unit].tiling_flags == 0)
1037	tex_res.tile_mode           = 1;
1038    r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[unit].domain);
1039
1040    tex_samp.id                 = unit;
1041    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1042
1043    switch (repeatType) {
1044    case RepeatNormal:
1045	tex_samp.clamp_x            = SQ_TEX_WRAP;
1046	tex_samp.clamp_y            = SQ_TEX_WRAP;
1047	break;
1048    case RepeatPad:
1049	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1050	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1051	break;
1052    case RepeatReflect:
1053	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1054	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1055	break;
1056    case RepeatNone:
1057	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1058	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1059	break;
1060    default:
1061	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1062    }
1063
1064    switch (pPict->filter) {
1065    case PictFilterNearest:
1066	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1067	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1068	tex_samp.mc_coord_truncate  = 1;
1069	break;
1070    case PictFilterBilinear:
1071	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1072	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1073	break;
1074    default:
1075	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1076    }
1077
1078    tex_samp.clamp_z            = SQ_TEX_WRAP;
1079    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1080    tex_samp.mip_filter         = 0;			/* no mipmap */
1081    r600_set_tex_sampler(pScrn, &tex_samp);
1082
1083    if (pPict->transform != 0) {
1084	accel_state->is_transform[unit] = TRUE;
1085	accel_state->transform[unit] = pPict->transform;
1086
1087	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1088	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1089	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1090	vs_alu_consts[3] = 1.0 / tex_res.w;
1091
1092	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1093	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1094	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1095	vs_alu_consts[7] = 1.0 / tex_res.h;
1096    } else {
1097	accel_state->is_transform[unit] = FALSE;
1098
1099	vs_alu_consts[0] = 1.0;
1100	vs_alu_consts[1] = 0.0;
1101	vs_alu_consts[2] = 0.0;
1102	vs_alu_consts[3] = 1.0 / tex_res.w;
1103
1104	vs_alu_consts[4] = 0.0;
1105	vs_alu_consts[5] = 1.0;
1106	vs_alu_consts[6] = 0.0;
1107	vs_alu_consts[7] = 1.0 / tex_res.h;
1108    }
1109
1110    /* VS alu constants */
1111    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_vs + (unit * 2),
1112			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1113
1114    return TRUE;
1115}
1116
1117static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1118			       PicturePtr pDstPicture)
1119{
1120    uint32_t tmp1;
1121    PixmapPtr pSrcPixmap, pDstPixmap;
1122
1123    /* Check for unsupported compositing operations. */
1124    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1125	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1126
1127    if (pSrcPicture->pDrawable) {
1128	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1129
1130	if (pSrcPixmap->drawable.width >= 8192 ||
1131	    pSrcPixmap->drawable.height >= 8192) {
1132	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1133			     pSrcPixmap->drawable.width,
1134			     pSrcPixmap->drawable.height));
1135	}
1136
1137	if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1138	    return FALSE;
1139    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1140	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1141
1142    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1143
1144    if (pDstPixmap->drawable.width >= 8192 ||
1145	pDstPixmap->drawable.height >= 8192) {
1146	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1147			 pDstPixmap->drawable.width,
1148			 pDstPixmap->drawable.height));
1149    }
1150
1151    if (pMaskPicture) {
1152	PixmapPtr pMaskPixmap;
1153
1154	if (pMaskPicture->pDrawable) {
1155	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1156
1157	    if (pMaskPixmap->drawable.width >= 8192 ||
1158		pMaskPixmap->drawable.height >= 8192) {
1159	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1160			       pMaskPixmap->drawable.width,
1161			       pMaskPixmap->drawable.height));
1162	    }
1163
1164	    if (pMaskPicture->componentAlpha) {
1165		/* Check if it's component alpha that relies on a source alpha and
1166		 * on the source value.  We can only get one of those into the
1167		 * single source value that we get to blend with.
1168		 *
1169		 * We can cheat a bit if the src is solid, though. PictOpOver
1170		 * can use the constant blend color to sneak a second blend
1171		 * source in.
1172		 */
1173		if (R600BlendOp[op].src_alpha &&
1174		    (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1175		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1176		    if (pSrcPicture->pDrawable || op != PictOpOver)
1177			RADEON_FALLBACK(("Component alpha not supported with source "
1178					 "alpha and source value blending.\n"));
1179		}
1180	    }
1181
1182	    if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1183		return FALSE;
1184	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1185	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1186    }
1187
1188    if (!R600GetDestFormat(pDstPicture, &tmp1))
1189	return FALSE;
1190
1191    return TRUE;
1192
1193}
1194
1195static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
1196{
1197    RADEONInfoPtr info = RADEONPTR(pScrn);
1198    struct radeon_accel_state *accel_state = info->accel_state;
1199    float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
1200
1201    uint32_t w = (fg >> 24) & 0xff;
1202    uint32_t z = (fg >> 16) & 0xff;
1203    uint32_t y = (fg >> 8) & 0xff;
1204    uint32_t x = (fg >> 0) & 0xff;
1205    float xf = (float)x / 255; /* R */
1206    float yf = (float)y / 255; /* G */
1207    float zf = (float)z / 255; /* B */
1208    float wf = (float)w / 255; /* A */
1209
1210    /* component swizzles */
1211    switch (format) {
1212	case PICT_a1r5g5b5:
1213	case PICT_a8r8g8b8:
1214	    pix_r = zf; /* R */
1215	    pix_g = yf; /* G */
1216	    pix_b = xf; /* B */
1217	    pix_a = wf; /* A */
1218	    break;
1219	case PICT_a8b8g8r8:
1220	    pix_r = xf; /* R */
1221	    pix_g = yf; /* G */
1222	    pix_b = zf; /* B */
1223	    pix_a = wf; /* A */
1224	    break;
1225	case PICT_x8b8g8r8:
1226	    pix_r = xf; /* R */
1227	    pix_g = yf; /* G */
1228	    pix_b = zf; /* B */
1229	    pix_a = 1.0; /* A */
1230	    break;
1231	case PICT_b8g8r8a8:
1232	    pix_r = yf; /* R */
1233	    pix_g = zf; /* G */
1234	    pix_b = wf; /* B */
1235	    pix_a = xf; /* A */
1236	    break;
1237	case PICT_b8g8r8x8:
1238	    pix_r = yf; /* R */
1239	    pix_g = zf; /* G */
1240	    pix_b = wf; /* B */
1241	    pix_a = 1.0; /* A */
1242	    break;
1243	case PICT_x1r5g5b5:
1244	case PICT_x8r8g8b8:
1245	case PICT_r5g6b5:
1246	    pix_r = zf; /* R */
1247	    pix_g = yf; /* G */
1248	    pix_b = xf; /* B */
1249	    pix_a = 1.0; /* A */
1250	    break;
1251	case PICT_a8:
1252	    pix_r = 0.0; /* R */
1253	    pix_g = 0.0; /* G */
1254	    pix_b = 0.0; /* B */
1255	    pix_a = xf; /* A */
1256	    break;
1257	default:
1258	    ErrorF("Bad format 0x%x\n", format);
1259    }
1260
1261    if (unit == 0) {
1262	if (!accel_state->msk_pic) {
1263	    if (PICT_FORMAT_RGB(format) == 0) {
1264		pix_r = 0.0;
1265		pix_g = 0.0;
1266		pix_b = 0.0;
1267	    }
1268
1269	    if (PICT_FORMAT_A(format) == 0)
1270		pix_a = 1.0;
1271	} else {
1272	    if (accel_state->component_alpha) {
1273		if (accel_state->src_alpha) {
1274		    /* required for PictOpOver */
1275		    float cblend[4] = { pix_r / pix_a, pix_g / pix_a,
1276					pix_b / pix_a, pix_a / pix_a };
1277		    r600_set_blend_color(pScrn, cblend);
1278
1279		    if (PICT_FORMAT_A(format) == 0) {
1280			pix_r = 1.0;
1281			pix_g = 1.0;
1282			pix_b = 1.0;
1283			pix_a = 1.0;
1284		    } else {
1285			pix_r = pix_a;
1286			pix_g = pix_a;
1287			pix_b = pix_a;
1288		    }
1289		} else {
1290		    if (PICT_FORMAT_A(format) == 0)
1291			pix_a = 1.0;
1292		}
1293	    } else {
1294		if (PICT_FORMAT_RGB(format) == 0) {
1295		    pix_r = 0;
1296		    pix_g = 0;
1297		    pix_b = 0;
1298		}
1299
1300		if (PICT_FORMAT_A(format) == 0)
1301		    pix_a = 1.0;
1302	    }
1303	}
1304    } else {
1305	if (accel_state->component_alpha) {
1306	    if (PICT_FORMAT_A(format) == 0)
1307		pix_a = 1.0;
1308	} else {
1309	    if (PICT_FORMAT_A(format) == 0) {
1310		pix_r = 1.0;
1311		pix_g = 1.0;
1312		pix_b = 1.0;
1313		pix_a = 1.0;
1314	    } else {
1315		pix_r = pix_a;
1316		pix_g = pix_a;
1317		pix_b = pix_a;
1318	    }
1319	}
1320    }
1321
1322    buf[0] = pix_r;
1323    buf[1] = pix_g;
1324    buf[2] = pix_b;
1325    buf[3] = pix_a;
1326}
1327
1328static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1329				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1330				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1331{
1332    ScreenPtr pScreen = pDst->drawable.pScreen;
1333    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1334    RADEONInfoPtr info = RADEONPTR(pScrn);
1335    struct radeon_accel_state *accel_state = info->accel_state;
1336    uint32_t dst_format;
1337    cb_config_t cb_conf;
1338    shader_config_t vs_conf, ps_conf;
1339    struct r600_accel_object src_obj, mask_obj, dst_obj;
1340    uint32_t ps_bool_consts = 0;
1341    float ps_alu_consts[8];
1342
1343    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1344	return FALSE;
1345
1346    if (pSrc) {
1347	src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
1348	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1349	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1350	src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1351	src_obj.width = pSrc->drawable.width;
1352	src_obj.height = pSrc->drawable.height;
1353	src_obj.bpp = pSrc->drawable.bitsPerPixel;
1354	src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1355    }
1356
1357    dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
1358    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1359    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1360    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1361    dst_obj.width = pDst->drawable.width;
1362    dst_obj.height = pDst->drawable.height;
1363    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1364    if (radeon_get_pixmap_shared(pDst) == TRUE)
1365	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1366    else
1367	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1368
1369    if (pMaskPicture) {
1370	if (pMask) {
1371	    mask_obj.bo = radeon_get_pixmap_bo(pMask)->bo.radeon;
1372	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1373	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1374	    mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1375	    mask_obj.width = pMask->drawable.width;
1376	    mask_obj.height = pMask->drawable.height;
1377	    mask_obj.bpp = pMask->drawable.bitsPerPixel;
1378	    mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1379	}
1380
1381	accel_state->msk_pic = pMaskPicture;
1382	if (pMaskPicture->componentAlpha) {
1383	    accel_state->component_alpha = TRUE;
1384	    if (R600BlendOp[op].src_alpha)
1385		accel_state->src_alpha = TRUE;
1386	    else
1387		accel_state->src_alpha = FALSE;
1388	} else {
1389	    accel_state->component_alpha = FALSE;
1390	    accel_state->src_alpha = FALSE;
1391	}
1392    } else {
1393	accel_state->msk_pic = NULL;
1394	accel_state->component_alpha = FALSE;
1395	accel_state->src_alpha = FALSE;
1396    }
1397
1398    if (!R600SetAccelState(pScrn,
1399			   pSrc ? &src_obj : NULL,
1400			   (pMaskPicture && pMask) ? &mask_obj : NULL,
1401			   &dst_obj,
1402			   accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1403			   3, 0xffffffff))
1404	return FALSE;
1405
1406    if (!R600GetDestFormat(pDstPicture, &dst_format))
1407	return FALSE;
1408
1409    CLEAR (cb_conf);
1410    CLEAR (vs_conf);
1411    CLEAR (ps_conf);
1412
1413    if (pMask)
1414        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1415    else
1416        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1417
1418    radeon_cp_start(pScrn);
1419
1420    r600_set_default_state(pScrn);
1421
1422    r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1423    r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1424    r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1425
1426    if (pSrc) {
1427        if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1428            R600IBDiscard(pScrn);
1429            return FALSE;
1430        }
1431    } else
1432        accel_state->is_transform[0] = FALSE;
1433
1434    if (pMask) {
1435        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1436            R600IBDiscard(pScrn);
1437            return FALSE;
1438        }
1439    } else
1440        accel_state->is_transform[1] = FALSE;
1441
1442    if (pSrc)
1443	ps_bool_consts |= (1 << 0);
1444    if (pMask)
1445	ps_bool_consts |= (1 << 1);
1446    r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
1447
1448    if (pMask) {
1449	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
1450    } else {
1451	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
1452    }
1453
1454    /* Shader */
1455    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1456    vs_conf.shader_size         = accel_state->vs_size;
1457    vs_conf.num_gprs            = 5;
1458    vs_conf.stack_size          = 1;
1459    vs_conf.bo                  = accel_state->shaders_bo;
1460    r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1461
1462    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1463    ps_conf.shader_size         = accel_state->ps_size;
1464    ps_conf.num_gprs            = 2;
1465    ps_conf.stack_size          = 1;
1466    ps_conf.uncached_first_inst = 1;
1467    ps_conf.clamp_consts        = 0;
1468    ps_conf.export_mode         = 2;
1469    ps_conf.bo                  = accel_state->shaders_bo;
1470    r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1471
1472    cb_conf.id = 0;
1473    cb_conf.w = accel_state->dst_obj.pitch;
1474    cb_conf.h = accel_state->dst_obj.height;
1475    cb_conf.base = 0;
1476    cb_conf.format = dst_format;
1477    cb_conf.bo = accel_state->dst_obj.bo;
1478    cb_conf.surface = accel_state->dst_obj.surface;
1479
1480    switch (pDstPicture->format) {
1481    case PICT_a2r10g10b10:
1482    case PICT_x2r10g10b10:
1483    case PICT_a8r8g8b8:
1484    case PICT_x8r8g8b8:
1485    case PICT_a1r5g5b5:
1486    case PICT_x1r5g5b5:
1487    default:
1488	cb_conf.comp_swap = 1; /* ARGB */
1489	break;
1490    case PICT_a2b10g10r10:
1491    case PICT_x2b10g10r10:
1492    case PICT_a8b8g8r8:
1493    case PICT_x8b8g8r8:
1494	cb_conf.comp_swap = 0; /* ABGR */
1495	break;
1496    case PICT_b8g8r8a8:
1497    case PICT_b8g8r8x8:
1498	cb_conf.comp_swap = 3; /* BGRA */
1499	break;
1500    case PICT_r5g6b5:
1501	cb_conf.comp_swap = 2; /* RGB */
1502	break;
1503    case PICT_a8:
1504	cb_conf.comp_swap = 3; /* A */
1505	break;
1506    }
1507    cb_conf.source_format = 1;
1508    cb_conf.blend_clamp = 1;
1509    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1510    cb_conf.blend_enable = 1;
1511    cb_conf.pmask = 0xf;
1512    cb_conf.rop = 3;
1513    if (accel_state->dst_obj.tiling_flags == 0)
1514	cb_conf.array_mode = 0;
1515#if X_BYTE_ORDER == X_BIG_ENDIAN
1516    switch (dst_obj.bpp) {
1517    case 16:
1518	cb_conf.endian = ENDIAN_8IN16;
1519	break;
1520    case 32:
1521	cb_conf.endian = ENDIAN_8IN32;
1522	break;
1523    default:
1524	break;
1525    }
1526#endif
1527    r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
1528
1529    if (pMask)
1530	r600_set_spi(pScrn, (2 - 1), 2);
1531    else
1532	r600_set_spi(pScrn, (1 - 1), 1);
1533
1534    if (!pSrc) {
1535	/* solid src color */
1536	R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format,
1537			   pSrcPicture->pSourcePict->solidFill.color, 0);
1538    }
1539
1540    if (!pMaskPicture) {
1541	/* use identity constant if there is no mask */
1542	ps_alu_consts[4] = 1.0;
1543	ps_alu_consts[5] = 1.0;
1544	ps_alu_consts[6] = 1.0;
1545	ps_alu_consts[7] = 1.0;
1546    } else if (!pMask) {
1547	/* solid mask color */
1548	R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format,
1549			   pMaskPicture->pSourcePict->solidFill.color, 1);
1550    }
1551
1552    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
1553			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
1554
1555    if (accel_state->vsync)
1556	RADEONVlineHelperClear(pScrn);
1557
1558    accel_state->composite_op = op;
1559    accel_state->dst_pic = pDstPicture;
1560    accel_state->src_pic = pSrcPicture;
1561    accel_state->dst_pix = pDst;
1562    accel_state->msk_pix = pMask;
1563    accel_state->src_pix = pSrc;
1564
1565    return TRUE;
1566}
1567
1568static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1569				struct radeon_accel_state *accel_state)
1570{
1571    int vtx_size;
1572
1573    if (accel_state->vsync)
1574       r600_cp_wait_vline_sync(pScrn, pDst,
1575			       accel_state->vline_crtc,
1576			       accel_state->vline_y1,
1577			       accel_state->vline_y2);
1578
1579    vtx_size = accel_state->msk_pix ? 24 : 16;
1580
1581    r600_finish_op(pScrn, vtx_size);
1582}
1583
1584static void R600DoneComposite(PixmapPtr pDst)
1585{
1586    ScreenPtr pScreen = pDst->drawable.pScreen;
1587    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1588    RADEONInfoPtr info = RADEONPTR(pScrn);
1589    struct radeon_accel_state *accel_state = info->accel_state;
1590
1591    R600FinishComposite(pScrn, pDst, accel_state);
1592}
1593
1594static void R600Composite(PixmapPtr pDst,
1595			  int srcX, int srcY,
1596			  int maskX, int maskY,
1597			  int dstX, int dstY,
1598			  int w, int h)
1599{
1600    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1601    RADEONInfoPtr info = RADEONPTR(pScrn);
1602    struct radeon_accel_state *accel_state = info->accel_state;
1603    float *vb;
1604
1605    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1606       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1607
1608    if (CS_FULL(info->cs)) {
1609	R600FinishComposite(pScrn, pDst, info->accel_state);
1610	radeon_cs_flush_indirect(pScrn);
1611	R600PrepareComposite(info->accel_state->composite_op,
1612			     info->accel_state->src_pic,
1613			     info->accel_state->msk_pic,
1614			     info->accel_state->dst_pic,
1615			     info->accel_state->src_pix,
1616			     info->accel_state->msk_pix,
1617			     info->accel_state->dst_pix);
1618    }
1619
1620    if (accel_state->vsync)
1621	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1622
1623    if (accel_state->msk_pix) {
1624
1625	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1626
1627	vb[0] = (float)dstX;
1628	vb[1] = (float)dstY;
1629	vb[2] = (float)srcX;
1630	vb[3] = (float)srcY;
1631	vb[4] = (float)maskX;
1632	vb[5] = (float)maskY;
1633
1634	vb[6] = (float)dstX;
1635	vb[7] = (float)(dstY + h);
1636	vb[8] = (float)srcX;
1637	vb[9] = (float)(srcY + h);
1638	vb[10] = (float)maskX;
1639	vb[11] = (float)(maskY + h);
1640
1641	vb[12] = (float)(dstX + w);
1642	vb[13] = (float)(dstY + h);
1643	vb[14] = (float)(srcX + w);
1644	vb[15] = (float)(srcY + h);
1645	vb[16] = (float)(maskX + w);
1646	vb[17] = (float)(maskY + h);
1647
1648	radeon_vbo_commit(pScrn, &accel_state->vbo);
1649
1650    } else {
1651
1652	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1653
1654	vb[0] = (float)dstX;
1655	vb[1] = (float)dstY;
1656	vb[2] = (float)srcX;
1657	vb[3] = (float)srcY;
1658
1659	vb[4] = (float)dstX;
1660	vb[5] = (float)(dstY + h);
1661	vb[6] = (float)srcX;
1662	vb[7] = (float)(srcY + h);
1663
1664	vb[8] = (float)(dstX + w);
1665	vb[9] = (float)(dstY + h);
1666	vb[10] = (float)(srcX + w);
1667	vb[11] = (float)(srcY + h);
1668
1669	radeon_vbo_commit(pScrn, &accel_state->vbo);
1670    }
1671
1672
1673}
1674
1675static Bool
1676R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1677		     char *src, int src_pitch)
1678{
1679    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1680    RADEONInfoPtr info = RADEONPTR(pScrn);
1681    struct radeon_accel_state *accel_state = info->accel_state;
1682    struct radeon_exa_pixmap_priv *driver_priv;
1683    struct radeon_bo *scratch = NULL;
1684    struct radeon_bo *copy_dst;
1685    unsigned char *dst;
1686    unsigned size;
1687    uint32_t dst_domain;
1688    int bpp = pDst->drawable.bitsPerPixel;
1689    uint32_t scratch_pitch;
1690    uint32_t copy_pitch;
1691    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1692    int ret;
1693    Bool flush = TRUE;
1694    Bool r;
1695    int i;
1696    struct r600_accel_object src_obj, dst_obj;
1697    uint32_t height, base_align;
1698
1699    if (bpp < 8)
1700	return FALSE;
1701
1702    driver_priv = exaGetPixmapDriverPrivate(pDst);
1703    if (!driver_priv || !driver_priv->bo->bo.radeon)
1704	return FALSE;
1705
1706    /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */
1707    copy_dst = driver_priv->bo->bo.radeon;
1708    copy_pitch = pDst->devKind;
1709    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1710	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) {
1711	    flush = FALSE;
1712	    if (!radeon_bo_is_busy(driver_priv->bo->bo.radeon, &dst_domain) &&
1713		!(dst_domain & RADEON_GEM_DOMAIN_VRAM))
1714		goto copy;
1715	}
1716	/* use cpu copy for fast fb access */
1717	if (info->is_fast_fb)
1718	    goto copy;
1719    }
1720
1721    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1722    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1723    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1724    size = scratch_pitch * height * (bpp / 8);
1725    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1726    if (!scratch) {
1727	goto copy;
1728    }
1729
1730    src_obj.pitch = scratch_pitch;
1731    src_obj.width = w;
1732    src_obj.height = h;
1733    src_obj.bpp = bpp;
1734    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1735    src_obj.bo = scratch;
1736    src_obj.tiling_flags = 0;
1737    src_obj.surface = NULL;
1738
1739    dst_obj.pitch = dst_pitch_hw;
1740    dst_obj.width = pDst->drawable.width;
1741    dst_obj.height = pDst->drawable.height;
1742    dst_obj.bpp = bpp;
1743    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1744    dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
1745    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1746    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1747
1748    if (!R600SetAccelState(pScrn,
1749			   &src_obj,
1750			   NULL,
1751			   &dst_obj,
1752			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1753			   3, 0xffffffff)) {
1754        goto copy;
1755    }
1756    copy_dst = scratch;
1757    copy_pitch = scratch_pitch * (bpp / 8);
1758    flush = FALSE;
1759
1760copy:
1761    if (flush)
1762	radeon_cs_flush_indirect(pScrn);
1763
1764    ret = radeon_bo_map(copy_dst, 0);
1765    if (ret) {
1766        r = FALSE;
1767        goto out;
1768    }
1769    r = TRUE;
1770    size = w * bpp / 8;
1771    dst = copy_dst->ptr;
1772    if (copy_dst == driver_priv->bo->bo.radeon)
1773	dst += y * copy_pitch + x * bpp / 8;
1774    for (i = 0; i < h; i++) {
1775        memcpy(dst + i * copy_pitch, src, size);
1776        src += src_pitch;
1777    }
1778    radeon_bo_unmap(copy_dst);
1779
1780    if (copy_dst == scratch) {
1781	if (info->accel_state->vsync)
1782	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1783
1784	/* blit from gart to vram */
1785	R600DoPrepareCopy(pScrn);
1786	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1787	R600DoCopyVline(pDst);
1788    }
1789
1790out:
1791    if (scratch)
1792	radeon_bo_unref(scratch);
1793    return r;
1794}
1795
1796static Bool
1797R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1798			 int h, char *dst, int dst_pitch)
1799{
1800    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1801    RADEONInfoPtr info = RADEONPTR(pScrn);
1802    struct radeon_accel_state *accel_state = info->accel_state;
1803    struct radeon_exa_pixmap_priv *driver_priv;
1804    struct radeon_bo *scratch = NULL;
1805    struct radeon_bo *copy_src;
1806    unsigned size;
1807    uint32_t src_domain = 0;
1808    int bpp = pSrc->drawable.bitsPerPixel;
1809    uint32_t scratch_pitch;
1810    uint32_t copy_pitch;
1811    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1812    int ret;
1813    Bool flush = FALSE;
1814    Bool r;
1815    struct r600_accel_object src_obj, dst_obj;
1816    uint32_t height, base_align;
1817
1818    if (bpp < 8)
1819	return FALSE;
1820
1821    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1822    if (!driver_priv || !driver_priv->bo->bo.radeon)
1823	return FALSE;
1824
1825    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1826    copy_src = driver_priv->bo->bo.radeon;
1827    copy_pitch = pSrc->devKind;
1828    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1829	if (radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) {
1830	    src_domain = radeon_bo_get_src_domain(driver_priv->bo->bo.radeon);
1831	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1832		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1833		src_domain = 0;
1834	    else /* A write may be scheduled */
1835		flush = TRUE;
1836	}
1837
1838	if (!src_domain)
1839	    radeon_bo_is_busy(driver_priv->bo->bo.radeon, &src_domain);
1840
1841	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1842	    goto copy;
1843    }
1844
1845    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1846    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1847    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1848    size = scratch_pitch * height * (bpp / 8);
1849    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1850    if (!scratch) {
1851	goto copy;
1852    }
1853    radeon_cs_space_reset_bos(info->cs);
1854    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1855				      RADEON_GEM_DOMAIN_VRAM, 0);
1856    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1857    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1858    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1859    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1860    ret = radeon_cs_space_check(info->cs);
1861    if (ret) {
1862        goto copy;
1863    }
1864
1865    src_obj.pitch = src_pitch_hw;
1866    src_obj.width = pSrc->drawable.width;
1867    src_obj.height = pSrc->drawable.height;
1868    src_obj.bpp = bpp;
1869    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1870    src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
1871    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1872    src_obj.surface = radeon_get_pixmap_surface(pSrc);
1873
1874    dst_obj.pitch = scratch_pitch;
1875    dst_obj.width = w;
1876    dst_obj.height = h;
1877    dst_obj.bo = scratch;
1878    dst_obj.bpp = bpp;
1879    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1880    dst_obj.tiling_flags = 0;
1881    dst_obj.surface = NULL;
1882
1883    if (!R600SetAccelState(pScrn,
1884			   &src_obj,
1885			   NULL,
1886			   &dst_obj,
1887			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1888			   3, 0xffffffff)) {
1889        goto copy;
1890    }
1891
1892    /* blit from vram to gart */
1893    R600DoPrepareCopy(pScrn);
1894    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1895    R600DoCopy(pScrn);
1896    copy_src = scratch;
1897    copy_pitch = scratch_pitch * (bpp / 8);
1898    flush = TRUE;
1899
1900copy:
1901    if (flush)
1902	radeon_cs_flush_indirect(pScrn);
1903
1904    ret = radeon_bo_map(copy_src, 0);
1905    if (ret) {
1906	ErrorF("failed to map pixmap: %d\n", ret);
1907        r = FALSE;
1908        goto out;
1909    }
1910    r = TRUE;
1911    w *= bpp / 8;
1912    if (copy_src == driver_priv->bo->bo.radeon)
1913	size = y * copy_pitch + x * bpp / 8;
1914    else
1915	size = 0;
1916    while (h--) {
1917        memcpy(dst, copy_src->ptr + size, w);
1918        size += copy_pitch;
1919        dst += dst_pitch;
1920    }
1921    radeon_bo_unmap(copy_src);
1922out:
1923    if (scratch)
1924	radeon_bo_unref(scratch);
1925    return r;
1926}
1927
1928static int
1929R600MarkSync(ScreenPtr pScreen)
1930{
1931    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1932    RADEONInfoPtr info = RADEONPTR(pScrn);
1933    struct radeon_accel_state *accel_state = info->accel_state;
1934
1935    return ++accel_state->exaSyncMarker;
1936
1937}
1938
1939static void
1940R600Sync(ScreenPtr pScreen, int marker)
1941{
1942    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1943    RADEONInfoPtr info = RADEONPTR(pScrn);
1944    struct radeon_accel_state *accel_state = info->accel_state;
1945
1946    if (accel_state->exaMarkerSynced != marker) {
1947	accel_state->exaMarkerSynced = marker;
1948    }
1949
1950}
1951
1952static Bool
1953R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1954{
1955    RADEONInfoPtr info = RADEONPTR(pScrn);
1956    struct radeon_accel_state *accel_state = info->accel_state;
1957
1958    /* 512 bytes per shader for now */
1959    int size = 512 * 9;
1960
1961    accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
1962					     RADEON_GEM_DOMAIN_VRAM, 0);
1963    if (!accel_state->shaders_bo) {
1964        ErrorF("Allocating shader failed\n");
1965	return FALSE;
1966    }
1967    return TRUE;
1968}
1969
1970Bool
1971R600LoadShaders(ScrnInfoPtr pScrn)
1972{
1973    RADEONInfoPtr info = RADEONPTR(pScrn);
1974    struct radeon_accel_state *accel_state = info->accel_state;
1975    RADEONChipFamily ChipSet = info->ChipFamily;
1976    uint32_t *shader;
1977    int ret;
1978
1979    ret = radeon_bo_map(accel_state->shaders_bo, 1);
1980    if (ret) {
1981        FatalError("failed to map shader %d\n", ret);
1982	return FALSE;
1983    }
1984    shader = accel_state->shaders_bo->ptr;
1985
1986    /*  solid vs --------------------------------------- */
1987    accel_state->solid_vs_offset = 0;
1988    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1989
1990    /*  solid ps --------------------------------------- */
1991    accel_state->solid_ps_offset = 512;
1992    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1993
1994    /*  copy vs --------------------------------------- */
1995    accel_state->copy_vs_offset = 1024;
1996    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1997
1998    /*  copy ps --------------------------------------- */
1999    accel_state->copy_ps_offset = 1536;
2000    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2001
2002    /*  comp vs --------------------------------------- */
2003    accel_state->comp_vs_offset = 2048;
2004    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2005
2006    /*  comp ps --------------------------------------- */
2007    accel_state->comp_ps_offset = 2560;
2008    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2009
2010    /*  xv vs --------------------------------------- */
2011    accel_state->xv_vs_offset = 3072;
2012    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2013
2014    /*  xv ps --------------------------------------- */
2015    accel_state->xv_ps_offset = 3584;
2016    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2017
2018    radeon_bo_unmap(accel_state->shaders_bo);
2019    return TRUE;
2020}
2021
2022Bool
2023R600DrawInit(ScreenPtr pScreen)
2024{
2025    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2026    RADEONInfoPtr info   = RADEONPTR(pScrn);
2027
2028    if (!info->accel_state->exa) {
2029	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2030	return FALSE;
2031    }
2032
2033    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2034    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2035
2036    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2037    info->accel_state->exa->Solid = R600Solid;
2038    info->accel_state->exa->DoneSolid = R600DoneSolid;
2039
2040    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2041    info->accel_state->exa->Copy = R600Copy;
2042    info->accel_state->exa->DoneCopy = R600DoneCopy;
2043
2044    info->accel_state->exa->MarkSync = R600MarkSync;
2045    info->accel_state->exa->WaitMarker = R600Sync;
2046
2047    info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2048    info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2049    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2050    info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2051    info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2052    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2053    info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2054    info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking;
2055    info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking;
2056    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX |
2057	EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS;
2058    info->accel_state->exa->pixmapOffsetAlign = 256;
2059    info->accel_state->exa->pixmapPitchAlign = 256;
2060
2061    info->accel_state->exa->CheckComposite = R600CheckComposite;
2062    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2063    info->accel_state->exa->Composite = R600Composite;
2064    info->accel_state->exa->DoneComposite = R600DoneComposite;
2065
2066    info->accel_state->exa->maxPitchBytes = 32768;
2067    info->accel_state->exa->maxX = 8192;
2068    info->accel_state->exa->maxY = 8192;
2069
2070    /* not supported yet */
2071    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2072	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2073	info->accel_state->vsync = TRUE;
2074    } else
2075	info->accel_state->vsync = FALSE;
2076
2077    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2078	free(info->accel_state->exa);
2079	return FALSE;
2080    }
2081
2082    info->accel_state->XInited3D = FALSE;
2083    info->accel_state->src_obj[0].bo = NULL;
2084    info->accel_state->src_obj[1].bo = NULL;
2085    info->accel_state->dst_obj.bo = NULL;
2086    info->accel_state->copy_area_bo = NULL;
2087    info->accel_state->vbo.vb_start_op = -1;
2088    info->accel_state->finish_op = r600_finish_op;
2089    info->accel_state->vbo.verts_per_op = 3;
2090    RADEONVlineHelperClear(pScrn);
2091
2092    radeon_vbo_init_lists(pScrn);
2093
2094    if (!R600AllocShaders(pScrn, pScreen))
2095	return FALSE;
2096
2097    if (!R600LoadShaders(pScrn))
2098	return FALSE;
2099
2100    exaMarkSync(pScreen);
2101
2102    return TRUE;
2103
2104}
2105
2106