1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *          Matthias Hopf <mhopf@suse.de>
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#include "xf86.h"
31
32#include <errno.h>
33
34#include "radeon.h"
35#include "r600_shader.h"
36#include "radeon_reg.h"
37#include "r600_reg.h"
38#include "r600_state.h"
39
40#include "radeon_vbo.h"
41#include "radeon_exa_shared.h"
42
43static const uint32_t R600_ROP[16] = {
44    RADEON_ROP3_ZERO, /* GXclear        */
45    RADEON_ROP3_DSa,  /* Gxand          */
46    RADEON_ROP3_SDna, /* GXandReverse   */
47    RADEON_ROP3_S,    /* GXcopy         */
48    RADEON_ROP3_DSna, /* GXandInverted  */
49    RADEON_ROP3_D,    /* GXnoop         */
50    RADEON_ROP3_DSx,  /* GXxor          */
51    RADEON_ROP3_DSo,  /* GXor           */
52    RADEON_ROP3_DSon, /* GXnor          */
53    RADEON_ROP3_DSxn, /* GXequiv        */
54    RADEON_ROP3_Dn,   /* GXinvert       */
55    RADEON_ROP3_SDno, /* GXorReverse    */
56    RADEON_ROP3_Sn,   /* GXcopyInverted */
57    RADEON_ROP3_DSno, /* GXorInverted   */
58    RADEON_ROP3_DSan, /* GXnand         */
59    RADEON_ROP3_ONE,  /* GXset          */
60};
61
62/* we try and batch operations together under KMS -
63   but it doesn't work yet without misrendering */
64#define KMS_MULTI_OP 1
65
66/* Flush the indirect buffer to the kernel for submission to the card */
67void R600CPFlushIndirect(ScrnInfoPtr pScrn)
68{
69    radeon_cs_flush_indirect(pScrn);
70}
71
72void R600IBDiscard(ScrnInfoPtr pScrn)
73{
74    radeon_ib_discard(pScrn);
75}
76
77void
78r600_wait_3d_idle_clean(ScrnInfoPtr pScrn)
79{
80    RADEONInfoPtr info = RADEONPTR(pScrn);
81
82    //flush caches, don't generate timestamp
83    BEGIN_BATCH(5);
84    PACK3(IT_EVENT_WRITE, 1);
85    E32(CACHE_FLUSH_AND_INV_EVENT);
86    // wait for 3D idle clean
87    EREG(WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
88						   WAIT_3D_IDLECLEAN_bit));
89    END_BATCH();
90}
91
92void
93r600_wait_3d_idle(ScrnInfoPtr pScrn)
94{
95    RADEONInfoPtr info = RADEONPTR(pScrn);
96
97    BEGIN_BATCH(3);
98    EREG(WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
99    END_BATCH();
100}
101
102void
103r600_start_3d(ScrnInfoPtr pScrn)
104{
105    RADEONInfoPtr info = RADEONPTR(pScrn);
106
107    if (info->ChipFamily < CHIP_FAMILY_RV770) {
108	BEGIN_BATCH(5);
109	PACK3(IT_START_3D_CMDBUF, 1);
110	E32(0);
111    } else
112	BEGIN_BATCH(3);
113
114    PACK3(IT_CONTEXT_CONTROL, 2);
115    E32(0x80000000);
116    E32(0x80000000);
117    END_BATCH();
118
119}
120
121/*
122 * Setup of functional groups
123 */
124
125// asic stack/thread/gpr limits - need to query the drm
126static void
127r600_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
128{
129    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
130    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
131    RADEONInfoPtr info = RADEONPTR(pScrn);
132
133    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
134	(info->ChipFamily == CHIP_FAMILY_RV620) ||
135	(info->ChipFamily == CHIP_FAMILY_RS780) ||
136	(info->ChipFamily == CHIP_FAMILY_RS880) ||
137	(info->ChipFamily == CHIP_FAMILY_RV710))
138	sq_config = 0;						// no VC
139    else
140	sq_config = VC_ENABLE_bit;
141
142    sq_config |= (DX9_CONSTS_bit |
143		  ALU_INST_PREFER_VECTOR_bit |
144		  (sq_conf->ps_prio << PS_PRIO_shift) |
145		  (sq_conf->vs_prio << VS_PRIO_shift) |
146		  (sq_conf->gs_prio << GS_PRIO_shift) |
147		  (sq_conf->es_prio << ES_PRIO_shift));
148
149    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
150			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
151			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
152    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
153			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
154
155    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
156			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
157			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
158			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
159
160    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
161				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
162
163    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
164				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
165
166    BEGIN_BATCH(8);
167    PACK0(SQ_CONFIG, 6);
168    E32(sq_config);
169    E32(sq_gpr_resource_mgmt_1);
170    E32(sq_gpr_resource_mgmt_2);
171    E32(sq_thread_resource_mgmt);
172    E32(sq_stack_resource_mgmt_1);
173    E32(sq_stack_resource_mgmt_2);
174    END_BATCH();
175}
176
177void r600_set_blend_color(ScrnInfoPtr pScrn, float *color)
178{
179    RADEONInfoPtr info = RADEONPTR(pScrn);
180
181    BEGIN_BATCH(2 + 4);
182    PACK0(CB_BLEND_RED, 4);
183    EFLOAT(color[0]); /* R */
184    EFLOAT(color[1]); /* G */
185    EFLOAT(color[2]); /* B */
186    EFLOAT(color[3]); /* A */
187    END_BATCH();
188}
189
190
191void
192r600_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
193{
194    uint32_t cb_color_info, cb_color_control;
195    unsigned pitch, slice, h, array_mode;
196    RADEONInfoPtr info = RADEONPTR(pScrn);
197
198
199    if (cb_conf->surface) {
200	switch (cb_conf->surface->level[0].mode) {
201	case RADEON_SURF_MODE_1D:
202		array_mode = 2;
203		break;
204	case RADEON_SURF_MODE_2D:
205		array_mode = 4;
206		break;
207	default:
208		array_mode = 0;
209		break;
210	}
211	pitch = (cb_conf->surface->level[0].nblk_x >> 3) - 1;
212	slice = ((cb_conf->surface->level[0].nblk_x * cb_conf->surface->level[0].nblk_y) / 64) - 1;
213    } else
214    {
215	array_mode = cb_conf->array_mode;
216	pitch = (cb_conf->w / 8) - 1;
217	h = RADEON_ALIGN(cb_conf->h, 8);
218	slice = ((cb_conf->w * h) / 64) - 1;
219    }
220
221    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
222		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
223		     (array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
224		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
225		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
226		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
227    if (cb_conf->read_size)
228	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
229    if (cb_conf->blend_clamp)
230	cb_color_info |= BLEND_CLAMP_bit;
231    if (cb_conf->clear_color)
232	cb_color_info |= CLEAR_COLOR_bit;
233    if (cb_conf->blend_bypass)
234	cb_color_info |= BLEND_BYPASS_bit;
235    if (cb_conf->blend_float32)
236	cb_color_info |= BLEND_FLOAT32_bit;
237    if (cb_conf->simple_float)
238	cb_color_info |= SIMPLE_FLOAT_bit;
239    if (cb_conf->round_mode)
240	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
241    if (cb_conf->tile_compact)
242	cb_color_info |= TILE_COMPACT_bit;
243    if (cb_conf->source_format)
244	cb_color_info |= SOURCE_FORMAT_bit;
245
246    BEGIN_BATCH(3 + 2);
247    EREG((CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
248    RELOC_BATCH(cb_conf->bo, 0, domain);
249    END_BATCH();
250
251    // rv6xx workaround
252    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
253        (info->ChipFamily < CHIP_FAMILY_RV770)) {
254        BEGIN_BATCH(2);
255        PACK3(IT_SURFACE_BASE_UPDATE, 1);
256        E32((2 << cb_conf->id));
257        END_BATCH();
258    }
259    /* Set CMASK & TILE buffer to the offset of color buffer as
260     * we don't use those this shouldn't cause any issue and we
261     * then have a valid cmd stream
262     */
263    BEGIN_BATCH(3 + 2);
264    EREG((CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
265    RELOC_BATCH(cb_conf->bo, 0, domain);
266    END_BATCH();
267    BEGIN_BATCH(3 + 2);
268    EREG((CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
269    RELOC_BATCH(cb_conf->bo, 0, domain);
270    END_BATCH();
271    BEGIN_BATCH(9);
272    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
273    EREG((CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
274						    (slice << SLICE_TILE_MAX_shift)));
275    EREG((CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
276						    (0    << SLICE_MAX_shift)));
277    EREG((CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
278						    (0    << FMASK_TILE_MAX_shift)));
279    END_BATCH();
280
281    BEGIN_BATCH(3 + 2);
282    EREG((CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
283    RELOC_BATCH(cb_conf->bo, 0, domain);
284    END_BATCH();
285
286    BEGIN_BATCH(9);
287    EREG(CB_TARGET_MASK,          (cb_conf->pmask << TARGET0_ENABLE_shift));
288    cb_color_control = R600_ROP[cb_conf->rop] |
289	(cb_conf->blend_enable << TARGET_BLEND_ENABLE_shift);
290    if (info->ChipFamily == CHIP_FAMILY_R600) {
291	/* no per-MRT blend on R600 */
292	EREG(CB_COLOR_CONTROL,    cb_color_control);
293	EREG(CB_BLEND_CONTROL,    cb_conf->blendcntl);
294    } else {
295	if (cb_conf->blend_enable)
296	    cb_color_control |= PER_MRT_BLEND_bit;
297	EREG(CB_COLOR_CONTROL,    cb_color_control);
298	EREG(CB_BLEND0_CONTROL,   cb_conf->blendcntl);
299    }
300    END_BATCH();
301}
302
303static void
304r600_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
305			 uint32_t size, uint64_t mc_addr,
306			 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
307{
308    RADEONInfoPtr info = RADEONPTR(pScrn);
309    uint32_t cp_coher_size;
310    if (size == 0xffffffff)
311	cp_coher_size = 0xffffffff;
312    else
313	cp_coher_size = ((size + 255) >> 8);
314
315    BEGIN_BATCH(5 + 2);
316    PACK3(IT_SURFACE_SYNC, 4);
317    E32(sync_type);
318    E32(cp_coher_size);
319    E32((mc_addr >> 8));
320    E32(10); /* poll interval */
321    RELOC_BATCH(bo, rdomains, wdomain);
322    END_BATCH();
323}
324
325/* inserts a wait for vline in the command stream */
326void
327r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
328			xf86CrtcPtr crtc, int start, int stop)
329{
330    RADEONInfoPtr  info = RADEONPTR(pScrn);
331    drmmode_crtc_private_ptr drmmode_crtc;
332
333    if (!crtc)
334        return;
335
336    if (!crtc->enabled)
337        return;
338
339    if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
340        return;
341
342    start = max(start, crtc->y);
343    stop = min(stop, crtc->y + crtc->mode.VDisplay);
344
345    if (start >= stop)
346        return;
347
348    drmmode_crtc = crtc->driver_private;
349
350    BEGIN_BATCH(11);
351    /* set the VLINE range */
352    EREG(AVIVO_D1MODE_VLINE_START_END, /* this is just a marker */
353	 (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
354	 (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
355
356    /* tell the CP to poll the VLINE state register */
357    PACK3(IT_WAIT_REG_MEM, 6);
358    E32(IT_WAIT_REG | IT_WAIT_EQ);
359    E32(IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS));
360    E32(0);
361    E32(0);                          // Ref value
362    E32(AVIVO_D1MODE_VLINE_STAT);    // Mask
363    E32(10);                         // Wait interval
364    /* add crtc reloc */
365    PACK3(IT_NOP, 1);
366    E32(drmmode_crtc->mode_crtc->crtc_id);
367    END_BATCH();
368}
369
370void
371r600_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp)
372{
373    RADEONInfoPtr info = RADEONPTR(pScrn);
374
375    BEGIN_BATCH(8);
376    /* Interpolator setup */
377    EREG(SPI_VS_OUT_CONFIG, (vs_export_count << VS_EXPORT_COUNT_shift));
378    PACK0(SPI_PS_IN_CONTROL_0, 3);
379    E32((num_interp << NUM_INTERP_shift));
380    E32(0);
381    E32(0);
382    END_BATCH();
383}
384
385void
386r600_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
387{
388    RADEONInfoPtr info = RADEONPTR(pScrn);
389    uint32_t sq_pgm_resources;
390
391    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
392			(fs_conf->stack_size << STACK_SIZE_shift));
393
394    if (fs_conf->dx10_clamp)
395	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
396
397    BEGIN_BATCH(3 + 2);
398    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
399    RELOC_BATCH(fs_conf->bo, domain, 0);
400    END_BATCH();
401
402    BEGIN_BATCH(6);
403    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
404    EREG(SQ_PGM_CF_OFFSET_FS, 0);
405    END_BATCH();
406}
407
408void
409r600_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
410{
411    RADEONInfoPtr info = RADEONPTR(pScrn);
412    uint32_t sq_pgm_resources;
413
414    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
415			(vs_conf->stack_size << STACK_SIZE_shift));
416
417    if (vs_conf->dx10_clamp)
418	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
419    if (vs_conf->fetch_cache_lines)
420	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
421    if (vs_conf->uncached_first_inst)
422	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
423
424    /* flush SQ cache */
425    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
426			     vs_conf->shader_size, vs_conf->shader_addr,
427			     vs_conf->bo, domain, 0);
428
429    BEGIN_BATCH(3 + 2);
430    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
431    RELOC_BATCH(vs_conf->bo, domain, 0);
432    END_BATCH();
433
434    BEGIN_BATCH(6);
435    EREG(SQ_PGM_RESOURCES_VS, sq_pgm_resources);
436    EREG(SQ_PGM_CF_OFFSET_VS, 0);
437    END_BATCH();
438}
439
440void
441r600_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
442{
443    RADEONInfoPtr info = RADEONPTR(pScrn);
444    uint32_t sq_pgm_resources;
445
446    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
447			(ps_conf->stack_size << STACK_SIZE_shift));
448
449    if (ps_conf->dx10_clamp)
450	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
451    if (ps_conf->fetch_cache_lines)
452	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
453    if (ps_conf->uncached_first_inst)
454	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
455    if (ps_conf->clamp_consts)
456	sq_pgm_resources |= CLAMP_CONSTS_bit;
457
458    /* flush SQ cache */
459    r600_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
460			     ps_conf->shader_size, ps_conf->shader_addr,
461			     ps_conf->bo, domain, 0);
462
463    BEGIN_BATCH(3 + 2);
464    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
465    RELOC_BATCH(ps_conf->bo, domain, 0);
466    END_BATCH();
467
468    BEGIN_BATCH(9);
469    EREG(SQ_PGM_RESOURCES_PS, sq_pgm_resources);
470    EREG(SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
471    EREG(SQ_PGM_CF_OFFSET_PS, 0);
472    END_BATCH();
473}
474
475void
476r600_set_alu_consts(ScrnInfoPtr pScrn, int offset, int count, float *const_buf)
477{
478    RADEONInfoPtr info = RADEONPTR(pScrn);
479    int i;
480    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
481
482    BEGIN_BATCH(2 + countreg);
483    PACK0(SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
484    for (i = 0; i < countreg; i++)
485	EFLOAT(const_buf[i]);
486    END_BATCH();
487}
488
489void
490r600_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
491{
492    RADEONInfoPtr info = RADEONPTR(pScrn);
493    /* bool register order is: ps, vs, gs; one register each
494     * 1 bits per bool; 32 bools each for ps, vs, gs.
495     */
496    BEGIN_BATCH(3);
497    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
498    END_BATCH();
499}
500
501static void
502r600_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
503{
504    RADEONInfoPtr info = RADEONPTR(pScrn);
505    struct radeon_accel_state *accel_state = info->accel_state;
506    uint32_t sq_vtx_constant_word2;
507
508    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
509			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
510			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
511			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
512			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
513    if (res->clamp_x)
514	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
515
516    if (res->format_comp_all)
517	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
518
519    if (res->srf_mode_all)
520	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
521
522    /* flush vertex cache */
523    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
524	(info->ChipFamily == CHIP_FAMILY_RV620) ||
525	(info->ChipFamily == CHIP_FAMILY_RS780) ||
526	(info->ChipFamily == CHIP_FAMILY_RS880) ||
527	(info->ChipFamily == CHIP_FAMILY_RV710))
528	r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
529				 accel_state->vbo.vb_offset, 0,
530				 res->bo,
531				 domain, 0);
532    else
533	r600_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
534				 accel_state->vbo.vb_offset, 0,
535				 res->bo,
536				 domain, 0);
537
538    BEGIN_BATCH(9 + 2);
539    PACK0(SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
540    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
541    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
542    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
543    E32(res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
544    E32(0);							// 4: n/a
545    E32(0);							// 5: n/a
546    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
547    RELOC_BATCH(res->bo, domain, 0);
548    END_BATCH();
549}
550
551void
552r600_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
553{
554    RADEONInfoPtr info = RADEONPTR(pScrn);
555    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
556    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
557    uint32_t array_mode, pitch;
558
559    if (tex_res->surface) {
560	switch (tex_res->surface->level[0].mode) {
561	case RADEON_SURF_MODE_1D:
562		array_mode = 2;
563		break;
564	case RADEON_SURF_MODE_2D:
565		array_mode = 4;
566		break;
567	default:
568		array_mode = 0;
569		break;
570	}
571	pitch = tex_res->surface->level[0].nblk_x >> 3;
572    } else
573    {
574	array_mode = tex_res->tile_mode;
575	pitch = (tex_res->pitch + 7) >> 3;
576    }
577
578    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
579		     (array_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
580
581    if (tex_res->w)
582	sq_tex_resource_word0 |= (((pitch - 1) << PITCH_shift) |
583				  ((tex_res->w - 1) << TEX_WIDTH_shift));
584
585    if (tex_res->tile_type)
586	sq_tex_resource_word0 |= TILE_TYPE_bit;
587
588    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
589
590    if (tex_res->h)
591	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
592    if (tex_res->depth)
593	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
594
595    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
596			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
597			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
598			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
599			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
600			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
601			     (tex_res->request_size << REQUEST_SIZE_shift) |
602			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
603			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
604			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
605			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
606			     (tex_res->base_level << BASE_LEVEL_shift));
607
608    if (tex_res->srf_mode_all)
609	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
610    if (tex_res->force_degamma)
611	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
612
613    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
614			     (tex_res->base_array << BASE_ARRAY_shift) |
615			     (tex_res->last_array << LAST_ARRAY_shift));
616
617    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
618			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
619			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
620
621    if (tex_res->interlaced)
622	sq_tex_resource_word6 |= INTERLACED_bit;
623
624    /* flush texture cache */
625    r600_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
626			     tex_res->size, tex_res->base,
627			     tex_res->bo, domain, 0);
628
629    BEGIN_BATCH(9 + 4);
630    PACK0(SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
631    E32(sq_tex_resource_word0);
632    E32(sq_tex_resource_word1);
633    E32(((tex_res->base) >> 8));
634    E32(((tex_res->mip_base) >> 8));
635    E32(sq_tex_resource_word4);
636    E32(sq_tex_resource_word5);
637    E32(sq_tex_resource_word6);
638    RELOC_BATCH(tex_res->bo, domain, 0);
639    RELOC_BATCH(tex_res->mip_bo, domain, 0);
640    END_BATCH();
641}
642
643void
644r600_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
645{
646    RADEONInfoPtr info = RADEONPTR(pScrn);
647    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
648
649    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
650			    (s->clamp_y       << CLAMP_Y_shift)					|
651			    (s->clamp_z       << CLAMP_Z_shift)					|
652			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
653			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
654			    (s->z_filter      << Z_FILTER_shift)	|
655			    (s->mip_filter    << MIP_FILTER_shift)				|
656			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
657			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
658			    (s->chroma_key    << CHROMA_KEY_shift));
659    if (s->point_sampling_clamp)
660	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
661    if (s->tex_array_override)
662	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
663    if (s->lod_uses_minor_axis)
664	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
665
666    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
667			    (s->max_lod       << MAX_LOD_shift)					|
668			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
669
670    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
671			    (s->perf_mip      << PERF_MIP_shift)	|
672			    (s->perf_z        << PERF_Z_shift));
673    if (s->mc_coord_truncate)
674	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
675    if (s->force_degamma)
676	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
677    if (s->high_precision_filter)
678	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
679    if (s->fetch_4)
680	sq_tex_sampler_word2 |= FETCH_4_bit;
681    if (s->sample_is_pcf)
682	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
683    if (s->type)
684	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
685
686    BEGIN_BATCH(5);
687    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
688    E32(sq_tex_sampler_word0);
689    E32(sq_tex_sampler_word1);
690    E32(sq_tex_sampler_word2);
691    END_BATCH();
692}
693
694//XXX deal with clip offsets in clip setup
695void
696r600_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
697{
698    RADEONInfoPtr info = RADEONPTR(pScrn);
699
700    BEGIN_BATCH(4);
701    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
702    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
703	     (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
704    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
705	     (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
706    END_BATCH();
707}
708
709void
710r600_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
711{
712    RADEONInfoPtr info = RADEONPTR(pScrn);
713
714    BEGIN_BATCH(4);
715    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
716    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
717	     (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
718	     WINDOW_OFFSET_DISABLE_bit));
719    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
720	     (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
721    END_BATCH();
722}
723
724void
725r600_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
726{
727    RADEONInfoPtr info = RADEONPTR(pScrn);
728
729    BEGIN_BATCH(4);
730    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
731    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
732	     (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
733	     WINDOW_OFFSET_DISABLE_bit));
734    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
735	     (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
736    END_BATCH();
737}
738
739void
740r600_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
741{
742    RADEONInfoPtr info = RADEONPTR(pScrn);
743
744    BEGIN_BATCH(4);
745    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
746    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
747	     (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
748	     WINDOW_OFFSET_DISABLE_bit));
749    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
750	      (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
751    END_BATCH();
752}
753
754void
755r600_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
756{
757    RADEONInfoPtr info = RADEONPTR(pScrn);
758
759    BEGIN_BATCH(4);
760    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
761    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
762	     (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
763    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
764	     (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
765    END_BATCH();
766}
767
768/*
769 * Setup of default state
770 */
771
772void
773r600_set_default_state(ScrnInfoPtr pScrn)
774{
775    tex_resource_t tex_res;
776    shader_config_t fs_conf;
777    sq_config_t sq_conf;
778    int i;
779    RADEONInfoPtr info = RADEONPTR(pScrn);
780    struct radeon_accel_state *accel_state = info->accel_state;
781
782    if (accel_state->XInited3D)
783	return;
784
785    memset(&tex_res, 0, sizeof(tex_resource_t));
786    memset(&fs_conf, 0, sizeof(shader_config_t));
787
788    accel_state->XInited3D = TRUE;
789
790    r600_start_3d(pScrn);
791
792    // SQ
793    sq_conf.ps_prio = 0;
794    sq_conf.vs_prio = 1;
795    sq_conf.gs_prio = 2;
796    sq_conf.es_prio = 3;
797    // need to set stack/thread/gpr limits based on the asic
798    // for now just set them low enough so any card will work
799    // see r600_cp.c in the drm
800    switch (info->ChipFamily) {
801    case CHIP_FAMILY_R600:
802	sq_conf.num_ps_gprs = 192;
803	sq_conf.num_vs_gprs = 56;
804	sq_conf.num_temp_gprs = 4;
805	sq_conf.num_gs_gprs = 0;
806	sq_conf.num_es_gprs = 0;
807	sq_conf.num_ps_threads = 136;
808	sq_conf.num_vs_threads = 48;
809	sq_conf.num_gs_threads = 4;
810	sq_conf.num_es_threads = 4;
811	sq_conf.num_ps_stack_entries = 128;
812	sq_conf.num_vs_stack_entries = 128;
813	sq_conf.num_gs_stack_entries = 0;
814	sq_conf.num_es_stack_entries = 0;
815	break;
816    case CHIP_FAMILY_RV630:
817    case CHIP_FAMILY_RV635:
818	sq_conf.num_ps_gprs = 84;
819	sq_conf.num_vs_gprs = 36;
820	sq_conf.num_temp_gprs = 4;
821	sq_conf.num_gs_gprs = 0;
822	sq_conf.num_es_gprs = 0;
823	sq_conf.num_ps_threads = 144;
824	sq_conf.num_vs_threads = 40;
825	sq_conf.num_gs_threads = 4;
826	sq_conf.num_es_threads = 4;
827	sq_conf.num_ps_stack_entries = 40;
828	sq_conf.num_vs_stack_entries = 40;
829	sq_conf.num_gs_stack_entries = 32;
830	sq_conf.num_es_stack_entries = 16;
831	break;
832    case CHIP_FAMILY_RV610:
833    case CHIP_FAMILY_RV620:
834    case CHIP_FAMILY_RS780:
835    case CHIP_FAMILY_RS880:
836    default:
837	sq_conf.num_ps_gprs = 84;
838	sq_conf.num_vs_gprs = 36;
839	sq_conf.num_temp_gprs = 4;
840	sq_conf.num_gs_gprs = 0;
841	sq_conf.num_es_gprs = 0;
842	sq_conf.num_ps_threads = 136;
843	sq_conf.num_vs_threads = 48;
844	sq_conf.num_gs_threads = 4;
845	sq_conf.num_es_threads = 4;
846	sq_conf.num_ps_stack_entries = 40;
847	sq_conf.num_vs_stack_entries = 40;
848	sq_conf.num_gs_stack_entries = 32;
849	sq_conf.num_es_stack_entries = 16;
850	break;
851    case CHIP_FAMILY_RV670:
852	sq_conf.num_ps_gprs = 144;
853	sq_conf.num_vs_gprs = 40;
854	sq_conf.num_temp_gprs = 4;
855	sq_conf.num_gs_gprs = 0;
856	sq_conf.num_es_gprs = 0;
857	sq_conf.num_ps_threads = 136;
858	sq_conf.num_vs_threads = 48;
859	sq_conf.num_gs_threads = 4;
860	sq_conf.num_es_threads = 4;
861	sq_conf.num_ps_stack_entries = 40;
862	sq_conf.num_vs_stack_entries = 40;
863	sq_conf.num_gs_stack_entries = 32;
864	sq_conf.num_es_stack_entries = 16;
865	break;
866    case CHIP_FAMILY_RV770:
867	sq_conf.num_ps_gprs = 192;
868	sq_conf.num_vs_gprs = 56;
869	sq_conf.num_temp_gprs = 4;
870	sq_conf.num_gs_gprs = 0;
871	sq_conf.num_es_gprs = 0;
872	sq_conf.num_ps_threads = 188;
873	sq_conf.num_vs_threads = 60;
874	sq_conf.num_gs_threads = 0;
875	sq_conf.num_es_threads = 0;
876	sq_conf.num_ps_stack_entries = 256;
877	sq_conf.num_vs_stack_entries = 256;
878	sq_conf.num_gs_stack_entries = 0;
879	sq_conf.num_es_stack_entries = 0;
880	break;
881    case CHIP_FAMILY_RV730:
882    case CHIP_FAMILY_RV740:
883	sq_conf.num_ps_gprs = 84;
884	sq_conf.num_vs_gprs = 36;
885	sq_conf.num_temp_gprs = 4;
886	sq_conf.num_gs_gprs = 0;
887	sq_conf.num_es_gprs = 0;
888	sq_conf.num_ps_threads = 188;
889	sq_conf.num_vs_threads = 60;
890	sq_conf.num_gs_threads = 0;
891	sq_conf.num_es_threads = 0;
892	sq_conf.num_ps_stack_entries = 128;
893	sq_conf.num_vs_stack_entries = 128;
894	sq_conf.num_gs_stack_entries = 0;
895	sq_conf.num_es_stack_entries = 0;
896	break;
897    case CHIP_FAMILY_RV710:
898	sq_conf.num_ps_gprs = 192;
899	sq_conf.num_vs_gprs = 56;
900	sq_conf.num_temp_gprs = 4;
901	sq_conf.num_gs_gprs = 0;
902	sq_conf.num_es_gprs = 0;
903	sq_conf.num_ps_threads = 144;
904	sq_conf.num_vs_threads = 48;
905	sq_conf.num_gs_threads = 0;
906	sq_conf.num_es_threads = 0;
907	sq_conf.num_ps_stack_entries = 128;
908	sq_conf.num_vs_stack_entries = 128;
909	sq_conf.num_gs_stack_entries = 0;
910	sq_conf.num_es_stack_entries = 0;
911	break;
912    }
913
914    r600_sq_setup(pScrn, &sq_conf);
915
916    /* set fake reloc for unused depth */
917    BEGIN_BATCH(3 + 2);
918    EREG(DB_DEPTH_INFO, 0);
919    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
920    END_BATCH();
921
922    BEGIN_BATCH(80);
923    if (info->ChipFamily < CHIP_FAMILY_RV770) {
924	EREG(TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
925			       (28 << TD_FIFO_CREDIT_shift)));
926	EREG(VC_ENHANCE, 0);
927	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
928	EREG(DB_DEBUG, 0x82000000); /* ? */
929	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
930				 (16 << DEPTH_FLUSH_shift) |
931				 (0 << FORCE_SUMMARIZE_shift) |
932				 (4 << DEPTH_PENDING_FREE_shift) |
933				 (16 << DEPTH_CACHELINE_FREE_shift) |
934				 0));
935    } else {
936	EREG(TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
937			       (28 << TD_FIFO_CREDIT_shift)));
938	EREG(VC_ENHANCE, 0);
939	EREG(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
940	EREG(DB_DEBUG, 0);
941	EREG(DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
942				 (16 << DEPTH_FLUSH_shift) |
943				 (0 << FORCE_SUMMARIZE_shift) |
944				 (4 << DEPTH_PENDING_FREE_shift) |
945				 (4 << DEPTH_CACHELINE_FREE_shift) |
946				 0));
947    }
948
949    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
950    E32(0);
951    E32(0);
952
953    PACK0(SQ_ESGS_RING_ITEMSIZE, 9);
954    E32(0); // SQ_ESGS_RING_ITEMSIZE
955    E32(0); // SQ_GSVS_RING_ITEMSIZE
956    E32(0); // SQ_ESTMP_RING_ITEMSIZE
957    E32(0); // SQ_GSTMP_RING_ITEMSIZE
958    E32(0); // SQ_VSTMP_RING_ITEMSIZE
959    E32(0); // SQ_PSTMP_RING_ITEMSIZE
960    E32(0); // SQ_FBUF_RING_ITEMSIZE
961    E32(0); // SQ_REDUC_RING_ITEMSIZE
962    E32(0); // SQ_GS_VERT_ITEMSIZE
963
964    // DB
965    EREG(DB_DEPTH_CONTROL,                    0);
966    PACK0(DB_RENDER_CONTROL, 2);
967    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
968    if (info->ChipFamily < CHIP_FAMILY_RV770)
969	E32(FORCE_SHADER_Z_ORDER_bit);
970    else
971	E32(0);
972    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
973						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
974						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
975						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
976    EREG(DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
977				 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
978
979    PACK0(DB_STENCIL_CLEAR, 2);
980    E32(0); // DB_STENCIL_CLEAR
981    E32(0); // DB_DEPTH_CLEAR
982
983    PACK0(DB_STENCILREFMASK, 3);
984    E32(0); // DB_STENCILREFMASK
985    E32(0); // DB_STENCILREFMASK_BF
986    E32(0); // SX_ALPHA_REF
987
988    PACK0(CB_CLRCMP_CONTROL, 4);
989    E32(1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
990    E32(0);							// CB_CLRCMP_SRC
991    E32(0);							// CB_CLRCMP_DST
992    E32(0);							// CB_CLRCMP_MSK
993
994    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
995    EREG(R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
996
997    PACK0(SX_ALPHA_TEST_CONTROL, 5);
998    E32(0); // SX_ALPHA_TEST_CONTROL
999    E32(0x00000000); // CB_BLEND_RED
1000    E32(0x00000000); // CB_BLEND_GREEN
1001    E32(0x00000000); // CB_BLEND_BLUE
1002    E32(0x00000000); // CB_BLEND_ALPHA
1003
1004    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
1005						   (0 << WINDOW_Y_OFFSET_shift)));
1006
1007    if (info->ChipFamily < CHIP_FAMILY_RV770)
1008	EREG(R7xx_PA_SC_EDGERULE,             0x00000000);
1009    else
1010	EREG(R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
1011
1012    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
1013
1014    END_BATCH();
1015
1016    /* clip boolean is set to always visible -> doesn't matter */
1017    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
1018	r600_set_clip_rect(pScrn, i, 0, 0, 8192, 8192);
1019
1020    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
1021	r600_set_vport_scissor(pScrn, i, 0, 0, 8192, 8192);
1022
1023    BEGIN_BATCH(49);
1024    PACK0(PA_SC_MPASS_PS_CNTL, 2);
1025    E32(0);
1026    if (info->ChipFamily < CHIP_FAMILY_RV770)
1027	E32((WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
1028    else
1029	E32((FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
1030		 0x00500000)); /* ? */
1031
1032    PACK0(PA_SC_LINE_CNTL, 9);
1033    E32(0); // PA_SC_LINE_CNTL
1034    E32(0); // PA_SC_AA_CONFIG
1035    E32(((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | PIX_CENTER_bit | // PA_SU_VTX_CNTL
1036	     (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
1037    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
1038    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
1039    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
1040    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
1041    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_MCTX
1042    E32(0);                                                 // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M
1043
1044    EREG(PA_SC_AA_MASK,                       0xFFFFFFFF);
1045
1046    PACK0(PA_CL_CLIP_CNTL, 5);
1047    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
1048    E32(FACE_bit);         // PA_SU_SC_MODE_CNTL
1049    E32(VTX_XY_FMT_bit);   // PA_CL_VTE_CNTL
1050    E32(0);                // PA_CL_VS_OUT_CNTL
1051    E32(0);                // PA_CL_NANINF_CNTL
1052
1053    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
1054    E32(0); // PA_SU_POLY_OFFSET_DB_FMT_CNTL
1055    E32(0); // PA_SU_POLY_OFFSET_CLAMP
1056    E32(0); // PA_SU_POLY_OFFSET_FRONT_SCALE
1057    E32(0); // PA_SU_POLY_OFFSET_FRONT_OFFSET
1058    E32(0); // PA_SU_POLY_OFFSET_BACK_SCALE
1059    E32(0); // PA_SU_POLY_OFFSET_BACK_OFFSET
1060
1061    // SPI
1062    if (info->ChipFamily < CHIP_FAMILY_RV770)
1063	EREG(R7xx_SPI_THREAD_GROUPING,        0);
1064    else
1065	EREG(R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
1066
1067    /* default Interpolator setup */
1068    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1069			       (1 << SEMANTIC_1_shift)));
1070    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1071    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1072    E32(((0    << SEMANTIC_shift)	|
1073	     (0x01 << DEFAULT_VAL_shift)	|
1074	     SEL_CENTROID_bit));
1075    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1076    E32(((1    << SEMANTIC_shift)	|
1077	     (0x01 << DEFAULT_VAL_shift)	|
1078	     SEL_CENTROID_bit));
1079
1080    PACK0(SPI_INPUT_Z, 4);
1081    E32(0); // SPI_INPUT_Z
1082    E32(0); // SPI_FOG_CNTL
1083    E32(0); // SPI_FOG_FUNC_SCALE
1084    E32(0); // SPI_FOG_FUNC_BIAS
1085
1086    END_BATCH();
1087
1088    // clear FS
1089    fs_conf.bo = accel_state->shaders_bo;
1090    r600_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
1091
1092    // VGT
1093    BEGIN_BATCH(46);
1094    PACK0(VGT_MAX_VTX_INDX, 4);
1095    E32(0xffffff); // VGT_MAX_VTX_INDX
1096    E32(0); // VGT_MIN_VTX_INDX
1097    E32(0); // VGT_INDX_OFFSET
1098    E32(0); // VGT_MULTI_PRIM_IB_RESET_INDX
1099
1100    EREG(VGT_PRIMITIVEID_EN,                  0);
1101    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
1102
1103    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
1104    E32(0); // VGT_INSTANCE_STEP_RATE_0
1105    E32(0); // VGT_INSTANCE_STEP_RATE_1
1106
1107    PACK0(PA_SU_POINT_SIZE, 17);
1108    E32(0); // PA_SU_POINT_SIZE
1109    E32(0); // PA_SU_POINT_MINMAX
1110    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
1111    E32(0); // PA_SC_LINE_STIPPLE
1112    E32(0); // VGT_OUTPUT_PATH_CNTL
1113    E32(0); // VGT_HOS_CNTL
1114    E32(0); // VGT_HOS_MAX_TESS_LEVEL
1115    E32(0); // VGT_HOS_MIN_TESS_LEVEL
1116    E32(0); // VGT_HOS_REUSE_DEPTH
1117    E32(0); // VGT_GROUP_PRIM_TYPE
1118    E32(0); // VGT_GROUP_FIRST_DECR
1119    E32(0); // VGT_GROUP_DECR
1120    E32(0); // VGT_GROUP_VECT_0_CNTL
1121    E32(0); // VGT_GROUP_VECT_1_CNTL
1122    E32(0); // VGT_GROUP_VECT_0_FMT_CNTL
1123    E32(0); // VGT_GROUP_VECT_1_FMT_CNTL
1124    E32(0); // VGT_GS_MODE
1125
1126    PACK0(VGT_STRMOUT_EN, 3);
1127    E32(0); // VGT_STRMOUT_EN
1128    E32(0); // VGT_REUSE_OFF
1129    E32(0); // VGT_VTX_CNT_EN
1130
1131    EREG(VGT_STRMOUT_BUFFER_EN,               0);
1132    EREG(SX_MISC,                             0);
1133    END_BATCH();
1134}
1135
1136
1137/*
1138 * Commands
1139 */
1140
1141void
1142r600_draw_immd(ScrnInfoPtr pScrn, draw_config_t *draw_conf, uint32_t *indices)
1143{
1144    RADEONInfoPtr info = RADEONPTR(pScrn);
1145    uint32_t i, count;
1146
1147    // calculate num of packets
1148    count = 2;
1149    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1150	count += (draw_conf->num_indices + 1) / 2;
1151    else
1152	count += draw_conf->num_indices;
1153
1154    BEGIN_BATCH(8 + count);
1155    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1156    PACK3(IT_INDEX_TYPE, 1);
1157#if X_BYTE_ORDER == X_BIG_ENDIAN
1158    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1159#else
1160    E32(draw_conf->index_type);
1161#endif
1162    PACK3(IT_NUM_INSTANCES, 1);
1163    E32(draw_conf->num_instances);
1164
1165    PACK3(IT_DRAW_INDEX_IMMD, count);
1166    E32(draw_conf->num_indices);
1167    E32(draw_conf->vgt_draw_initiator);
1168
1169    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1170	for (i = 0; i < draw_conf->num_indices; i += 2) {
1171	    if ((i + 1) == draw_conf->num_indices)
1172		E32(indices[i]);
1173	    else
1174		E32((indices[i] | (indices[i + 1] << 16)));
1175	}
1176    } else {
1177	for (i = 0; i < draw_conf->num_indices; i++)
1178	    E32(indices[i]);
1179    }
1180    END_BATCH();
1181}
1182
1183void
1184r600_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
1185{
1186    RADEONInfoPtr info = RADEONPTR(pScrn);
1187
1188    BEGIN_BATCH(10);
1189    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1190    PACK3(IT_INDEX_TYPE, 1);
1191#if X_BYTE_ORDER == X_BIG_ENDIAN
1192    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
1193#else
1194    E32(draw_conf->index_type);
1195#endif
1196    PACK3(IT_NUM_INSTANCES, 1);
1197    E32(draw_conf->num_instances);
1198    PACK3(IT_DRAW_INDEX_AUTO, 2);
1199    E32(draw_conf->num_indices);
1200    E32(draw_conf->vgt_draw_initiator);
1201    END_BATCH();
1202}
1203
1204void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
1205{
1206    RADEONInfoPtr info = RADEONPTR(pScrn);
1207    struct radeon_accel_state *accel_state = info->accel_state;
1208    draw_config_t   draw_conf;
1209    vtx_resource_t  vtx_res;
1210
1211    if (accel_state->vbo.vb_start_op == -1)
1212	return;
1213
1214    CLEAR (draw_conf);
1215    CLEAR (vtx_res);
1216
1217    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
1218        R600IBDiscard(pScrn);
1219	return;
1220    }
1221
1222    /* Vertex buffer setup */
1223    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
1224    vtx_res.id              = SQ_VTX_RESOURCE_vs;
1225    vtx_res.vtx_size_dw     = vtx_size / 4;
1226    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
1227    vtx_res.mem_req_size    = 1;
1228    vtx_res.vb_addr         = accel_state->vbo.vb_start_op;
1229    vtx_res.bo              = accel_state->vbo.vb_bo;
1230#if X_BYTE_ORDER == X_BIG_ENDIAN
1231    vtx_res.endian          = SQ_ENDIAN_8IN32;
1232#endif
1233    r600_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
1234
1235    /* Draw */
1236    draw_conf.prim_type          = DI_PT_RECTLIST;
1237    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1238    draw_conf.num_instances      = 1;
1239    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1240    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1241
1242    r600_draw_auto(pScrn, &draw_conf);
1243
1244    /* XXX drm should handle this in fence submit */
1245    r600_wait_3d_idle_clean(pScrn);
1246
1247    /* sync dst surface */
1248    r600_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1249			     accel_state->dst_size, 0,
1250			     accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
1251
1252    accel_state->vbo.vb_start_op = -1;
1253    accel_state->ib_reset_op = 0;
1254
1255}
1256
1257