Home | History | Annotate | Line # | Download | only in shared-core
      1 /* r600_cp.c -- CP support for Radeon -*- linux-c -*- */
      2 /*
      3  * Copyright 2008 Advanced Micro Devices, Inc.
      4  * Copyright 2008 Red Hat Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     23  * DEALINGS IN THE SOFTWARE.
     24  *
     25  * Authors:
     26  *     Dave Airlie <airlied (at) redhat.com>
     27  *     Alex Deucher <alexander.deucher (at) amd.com>
     28  */
     29 
     30 #include "drmP.h"
     31 #include "drm.h"
     32 #include "radeon_drm.h"
     33 #include "radeon_drv.h"
     34 #include "r300_reg.h"
     35 
     36 #define PFP_UCODE_SIZE 576
     37 #define PM4_UCODE_SIZE 1792
     38 #define R700_PFP_UCODE_SIZE 848
     39 #define R700_PM4_UCODE_SIZE 1360
     40 #define EVERGREEN_PFP_UCODE_SIZE 1120
     41 #define EVERGREEN_PM4_UCODE_SIZE 1376
     42 
     43 # define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
     44 # define ATI_PCIGART_PAGE_MASK		(~(ATI_PCIGART_PAGE_SIZE-1))
     45 
     46 #define R600_PTE_VALID     (1 << 0)
     47 #define R600_PTE_SYSTEM    (1 << 1)
     48 #define R600_PTE_SNOOPED   (1 << 2)
     49 #define R600_PTE_READABLE  (1 << 5)
     50 #define R600_PTE_WRITEABLE (1 << 6)
     51 
     52 /* MAX values used for gfx init */
     53 #define R6XX_MAX_SH_GPRS           256
     54 #define R6XX_MAX_TEMP_GPRS         16
     55 #define R6XX_MAX_SH_THREADS        256
     56 #define R6XX_MAX_SH_STACK_ENTRIES  4096
     57 #define R6XX_MAX_BACKENDS          8
     58 #define R6XX_MAX_BACKENDS_MASK     0xff
     59 #define R6XX_MAX_SIMDS             8
     60 #define R6XX_MAX_SIMDS_MASK        0xff
     61 #define R6XX_MAX_PIPES             8
     62 #define R6XX_MAX_PIPES_MASK        0xff
     63 
     64 #define R7XX_MAX_SH_GPRS           256
     65 #define R7XX_MAX_TEMP_GPRS         16
     66 #define R7XX_MAX_SH_THREADS        256
     67 #define R7XX_MAX_SH_STACK_ENTRIES  4096
     68 #define R7XX_MAX_BACKENDS          8
     69 #define R7XX_MAX_BACKENDS_MASK     0xff
     70 #define R7XX_MAX_SIMDS             16
     71 #define R7XX_MAX_SIMDS_MASK        0xffff
     72 #define R7XX_MAX_PIPES             8
     73 #define R7XX_MAX_PIPES_MASK        0xff
     74 
     75 static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries)
     76 {
     77 	int i;
     78 
     79 	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
     80 
     81 	for (i = 0; i < dev_priv->usec_timeout; i++) {
     82 		int slots;
     83 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
     84 			slots = (RADEON_READ(R600_GRBM_STATUS)
     85 				 & R700_CMDFIFO_AVAIL_MASK);
     86 		else
     87 			slots = (RADEON_READ(R600_GRBM_STATUS)
     88 				 & R600_CMDFIFO_AVAIL_MASK);
     89 		if (slots >= entries)
     90 			return 0;
     91 		DRM_UDELAY(1);
     92 	}
     93 	DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
     94 		 RADEON_READ(R600_GRBM_STATUS),
     95 		 RADEON_READ(R600_GRBM_STATUS2));
     96 
     97 	return -EBUSY;
     98 }
     99 
    100 static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv)
    101 {
    102 	int i, ret;
    103 
    104 	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
    105 
    106 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)
    107 		ret = r600_do_wait_for_fifo(dev_priv, 8);
    108 	else
    109 		ret = r600_do_wait_for_fifo(dev_priv, 16);
    110 	if (ret)
    111 		return ret;
    112 	for (i = 0; i < dev_priv->usec_timeout; i++) {
    113 		if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
    114 			return 0;
    115 		DRM_UDELAY(1);
    116 	}
    117 	DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
    118 		 RADEON_READ(R600_GRBM_STATUS),
    119 		 RADEON_READ(R600_GRBM_STATUS2));
    120 
    121 	return -EBUSY;
    122 }
    123 
    124 void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info)
    125 {
    126 #ifdef __linux__
    127 	struct drm_sg_mem *entry = dev->sg;
    128 	int max_pages;
    129 	int pages;
    130 	int i;
    131 
    132 	if (!entry)
    133 		return;
    134 
    135 #endif
    136 	if (gart_info->bus_addr) {
    137 #ifdef __linux__
    138 		max_pages = (gart_info->table_size / sizeof(u64));
    139 		pages = (entry->pages <= max_pages)
    140 		  ? entry->pages : max_pages;
    141 
    142 		for (i = 0; i < pages; i++) {
    143 			if (!entry->busaddr[i])
    144 				break;
    145 			pci_unmap_page(dev->pdev, entry->busaddr[i],
    146 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
    147 		}
    148 #endif
    149 		if (gart_info->gart_table_location == DRM_ATI_GART_MAIN)
    150 			gart_info->bus_addr = 0;
    151 	}
    152 }
    153 
    154 /* R600 has page table setup */
    155 int r600_page_table_init(struct drm_device *dev)
    156 {
    157 	drm_radeon_private_t *dev_priv = dev->dev_private;
    158 	struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info;
    159 	struct drm_sg_mem *entry = dev->sg;
    160 	int ret = 0;
    161 	int i, j;
    162 	int max_pages, pages;
    163 	u64 *pci_gart, page_base;
    164 	dma_addr_t entry_addr;
    165 
    166 	/* okay page table is available - lets rock */
    167 
    168 	/* PTEs are 64-bits */
    169 	pci_gart = (u64 *)gart_info->addr;
    170 
    171 	max_pages = (gart_info->table_size / sizeof(u64));
    172 	pages = (entry->pages <= max_pages) ? entry->pages : max_pages;
    173 
    174 	memset(pci_gart, 0, max_pages * sizeof(u64));
    175 
    176 	for (i = 0; i < pages; i++) {
    177 #ifdef __linux__
    178 		entry->busaddr[i] = pci_map_page(dev->pdev,
    179 						 entry->pagelist[i], 0,
    180 						 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
    181 		if (entry->busaddr[i] == 0) {
    182 			DRM_ERROR("unable to map PCIGART pages!\n");
    183 			r600_page_table_cleanup(dev, gart_info);
    184 			goto done;
    185 		}
    186 #endif
    187 		entry_addr = entry->busaddr[i];
    188 		for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) {
    189 			page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK;
    190 			page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
    191 			page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
    192 
    193 			*pci_gart = page_base;
    194 
    195 			if ((i % 128) == 0)
    196 				DRM_DEBUG("page entry %d: 0x%016llx\n",
    197 				    i, (unsigned long long)page_base);
    198 			pci_gart++;
    199 			entry_addr += ATI_PCIGART_PAGE_SIZE;
    200 		}
    201 	}
    202 	ret = 1;
    203 #ifdef __linux__
    204 done:
    205 #endif
    206 	return ret;
    207 }
    208 
    209 static void r600_vm_flush_gart_range(struct drm_device *dev)
    210 {
    211 	drm_radeon_private_t *dev_priv = dev->dev_private;
    212 	u32 resp, countdown = 1000;
    213 	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12);
    214 	RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
    215 	RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2);
    216 
    217 	do {
    218 		resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE);
    219 		countdown--;
    220 		DRM_UDELAY(1);
    221 	} while (((resp & 0xf0) == 0) && countdown );
    222 }
    223 
    224 static void r600_vm_init(struct drm_device *dev)
    225 {
    226 	drm_radeon_private_t *dev_priv = dev->dev_private;
    227 	/* initialise the VM to use the page table we constructed up there */
    228 	u32 vm_c0, i;
    229 	u32 mc_rd_a;
    230 	u32 vm_l2_cntl, vm_l2_cntl3;
    231 	/* okay set up the PCIE aperture type thingo */
    232 	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
    233 	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
    234 	RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
    235 
    236 	/* setup MC RD a */
    237 	mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS |
    238 		R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) |
    239 		R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY;
    240 
    241 	RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a);
    242 	RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a);
    243 
    244 	RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a);
    245 	RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a);
    246 
    247 	RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a);
    248 	RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a);
    249 
    250 	RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a);
    251 	RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a);
    252 
    253 	RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING);
    254 	RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/);
    255 
    256 	RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a);
    257 	RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a);
    258 
    259 	RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE);
    260 	RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a);
    261 
    262 	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
    263 	vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7);
    264 	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
    265 
    266 	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
    267 	vm_l2_cntl3 = R600_VM_L2_CNTL3_BANK_SELECT_0(0) |
    268 	              R600_VM_L2_CNTL3_BANK_SELECT_1(1) |
    269 	              R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
    270 	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
    271 
    272 	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
    273 
    274 	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
    275 
    276 	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
    277 
    278 	/* disable all other contexts */
    279 	for (i = 1; i < 8; i++)
    280 		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
    281 
    282 	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
    283 	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
    284 	RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
    285 
    286 	r600_vm_flush_gart_range(dev);
    287 }
    288 
    289 /* load r600 microcode */
    290 static void r600_cp_load_microcode(drm_radeon_private_t * dev_priv)
    291 {
    292 	const char *chip_name;
    293 	u32 (*me)[3];
    294 	u32 *pfp;
    295 	size_t pfp_size, me_size;
    296 	int i, error;
    297 
    298 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
    299 	case CHIP_R600:
    300 		chip_name = "R600";
    301 		break;
    302 	case CHIP_RV610:
    303 		chip_name = "RV610";
    304 		break;
    305 	case CHIP_RV630:
    306 		chip_name = "RV630";
    307 		break;
    308 	case CHIP_RV620:
    309 		chip_name = "RV620";
    310 		break;
    311 	case CHIP_RV635:
    312 		chip_name = "RV635";
    313 		break;
    314 	case CHIP_RV670:
    315 		chip_name = "RV670";
    316 		break;
    317 	case CHIP_RS780:
    318 	case CHIP_RS880:
    319 		chip_name = "RS780";
    320 		break;
    321 	default:
    322 		return;
    323 	}
    324 
    325 	DRM_INFO("Loading %s Microcode\n", chip_name);
    326 
    327 	if ((error = radeon_load_a_microcode("%s_pfp.bin", chip_name, (void **)&pfp, &pfp_size)) != 0)
    328 		return;
    329 	if ((error = radeon_load_a_microcode("%s_me.bin", chip_name, (void **)&me, &me_size)) != 0) {
    330 		radeon_free_a_microcode(pfp, pfp_size);
    331 		return;
    332 	}
    333 
    334 	r600_do_cp_stop(dev_priv);
    335 
    336 	RADEON_WRITE(R600_CP_RB_CNTL,
    337 		     R600_RB_NO_UPDATE |
    338 		     R600_RB_BLKSZ(15) |
    339 		     R600_RB_BUFSZ(3));
    340 
    341 	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
    342 	RADEON_READ(R600_GRBM_SOFT_RESET);
    343 	DRM_UDELAY(15000);
    344 	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
    345 
    346 	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
    347 
    348 	for (i = 0; i < PM4_UCODE_SIZE; i++) {
    349 		RADEON_WRITE(R600_CP_ME_RAM_DATA, be32toh(me[i][0]));
    350 		RADEON_WRITE(R600_CP_ME_RAM_DATA, be32toh(me[i][1]));
    351 		RADEON_WRITE(R600_CP_ME_RAM_DATA, be32toh(me[i][2]));
    352 	}
    353 
    354 	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
    355 	for (i = 0; i < PFP_UCODE_SIZE; i++)
    356 		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32toh(pfp[i]));
    357 
    358 	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
    359 	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
    360 	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
    361 
    362 	radeon_free_a_microcode(pfp, pfp_size);
    363 	radeon_free_a_microcode(me, me_size);
    364 }
    365 
    366 static void r700_vm_init(struct drm_device *dev)
    367 {
    368 	drm_radeon_private_t *dev_priv = dev->dev_private;
    369 	/* initialise the VM to use the page table we constructed up there */
    370 	u32 vm_c0, i;
    371 	u32 mc_vm_md_l1;
    372 	u32 vm_l2_cntl, vm_l2_cntl3;
    373 	/* okay set up the PCIE aperture type thingo */
    374 	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12);
    375 	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
    376 	RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
    377 
    378 	mc_vm_md_l1 = R700_ENABLE_L1_TLB |
    379 	    R700_ENABLE_L1_FRAGMENT_PROCESSING |
    380 	    R700_SYSTEM_ACCESS_MODE_IN_SYS |
    381 	    R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
    382 	    R700_EFFECTIVE_L1_TLB_SIZE(5) |
    383 	    R700_EFFECTIVE_L1_QUEUE_SIZE(5);
    384 
    385 	RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1);
    386 	RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1);
    387 	RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1);
    388 	RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1);
    389 	RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1);
    390 	RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1);
    391 	RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1);
    392 
    393 	vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W;
    394 	vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7);
    395 	RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl);
    396 
    397 	RADEON_WRITE(R600_VM_L2_CNTL2, 0);
    398 	vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) |
    399 	              R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2);
    400 	RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3);
    401 
    402 	vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT;
    403 
    404 	RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0);
    405 
    406 	vm_c0 &= ~R600_VM_ENABLE_CONTEXT;
    407 
    408 	/* disable all other contexts */
    409 	for (i = 1; i < 8; i++)
    410 		RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0);
    411 
    412 	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12);
    413 	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12);
    414 	RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12);
    415 
    416 	r600_vm_flush_gart_range(dev);
    417 }
    418 
    419 /* load r600 microcode */
    420 static void r700_cp_load_microcode(drm_radeon_private_t * dev_priv)
    421 {
    422 	size_t pfp_req_size, me_req_size;
    423 	const char *chip_name;
    424 	u32 *pfp;
    425 	u32 *me;
    426 	size_t pfp_size, me_size;
    427 	int i, error;
    428 
    429 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
    430 	case CHIP_RV770:
    431 		chip_name = "RV770";
    432 		break;
    433 	case CHIP_RV730:
    434 	case CHIP_RV740:
    435 		chip_name = "RV730";
    436 		break;
    437 	case CHIP_RV710:
    438 		chip_name = "RV710";
    439 		break;
    440 	case CHIP_CEDAR:
    441 		chip_name = "CEDAR";
    442 		break;
    443 	case CHIP_REDWOOD:
    444 		chip_name = "REDWOOD";
    445 		break;
    446 	case CHIP_JUNIPER:
    447 		chip_name = "JUNIPER";
    448 		break;
    449 	case CHIP_CYPRESS:
    450 	case CHIP_HEMLOCK:
    451 		chip_name = "CYPRESS";
    452 		break;
    453 	default:
    454 		return;
    455 	}
    456 
    457 	DRM_INFO("Loading %s Microcode\n", chip_name);
    458 
    459 	if ((error = radeon_load_a_microcode("%s_pfp.bin", chip_name, (void **)&pfp, &pfp_size)) != 0)
    460 		return;
    461 	if ((error = radeon_load_a_microcode("%s_me.bin", chip_name, (void **)&me, &me_size)) != 0) {
    462 		radeon_free_a_microcode(pfp, pfp_size);
    463 		return;
    464 	}
    465 
    466 	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_CEDAR)) {
    467 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
    468 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
    469 	} else {
    470 		pfp_req_size = R700_PFP_UCODE_SIZE * 4;
    471 		me_req_size = R700_PM4_UCODE_SIZE * 4;
    472 	}
    473 
    474 	if (pfp_req_size != pfp_size) {
    475 		DRM_ERROR("Wrong size for %s_pfp.bin (got %zu want %zu)\n", chip_name, pfp_size, pfp_req_size);
    476 		radeon_free_a_microcode(pfp, pfp_size);
    477 		radeon_free_a_microcode(me, me_size);
    478 		return;
    479 	}
    480 
    481 	if (me_req_size != me_size) {
    482 		DRM_ERROR("Wrong size for %s_me.bin (got %zu want %zu)\n", chip_name, me_size, me_req_size);
    483 		radeon_free_a_microcode(pfp, pfp_size);
    484 		radeon_free_a_microcode(me, me_size);
    485 		return;
    486 	}
    487 
    488 	r600_do_cp_stop(dev_priv);
    489 
    490 	RADEON_WRITE(R600_CP_RB_CNTL,
    491 		     R600_RB_NO_UPDATE |
    492 		     R600_RB_BLKSZ(15) |
    493 		     R600_RB_BUFSZ(3));
    494 
    495 	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
    496 	RADEON_READ(R600_GRBM_SOFT_RESET);
    497 	DRM_UDELAY(15000);
    498 	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
    499 
    500 	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
    501 	for (i = 0; i < pfp_req_size / 4; i++)
    502 		RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32toh(pfp[i]));
    503 	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
    504 
    505 	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
    506 	for (i = 0; i < me_req_size / 4; i++)
    507 		RADEON_WRITE(R600_CP_ME_RAM_DATA, be32toh(me[i]));
    508 	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
    509 
    510 	RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
    511 	RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
    512 	RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0);
    513 
    514 	radeon_free_a_microcode(pfp, pfp_size);
    515 	radeon_free_a_microcode(me, me_size);
    516 }
    517 
    518 static void r600_test_writeback(drm_radeon_private_t * dev_priv)
    519 {
    520 	u32 tmp;
    521 
    522 	/* Writeback doesn't seem to work everywhere, test it here and possibly
    523 	 * enable it if it appears to work
    524 	 */
    525 	DRM_WRITE32(dev_priv->ring_rptr, R600_SCRATCHOFF(1), 0);
    526 	RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef);
    527 
    528 	for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) {
    529 		if (DRM_READ32(dev_priv->ring_rptr, R600_SCRATCHOFF(1)) ==
    530 		    0xdeadbeef)
    531 			break;
    532 		DRM_UDELAY(1);
    533 	}
    534 
    535 	if (tmp < dev_priv->usec_timeout) {
    536 		dev_priv->writeback_works = 1;
    537 		DRM_INFO("writeback test succeeded in %d usecs\n", tmp);
    538 	} else {
    539 		dev_priv->writeback_works = 0;
    540 
    541 		for (tmp = 0; tmp < 512; tmp+=16 )
    542 			DRM_DEBUG("%d %x %x %x %x\n",  tmp, DRM_READ32(dev_priv->ring_rptr, tmp),
    543 				  DRM_READ32(dev_priv->ring_rptr, tmp + 4),
    544 				  DRM_READ32(dev_priv->ring_rptr, tmp + 8),
    545 				  DRM_READ32(dev_priv->ring_rptr, tmp + 16));
    546 
    547 		DRM_INFO("writeback test failed %x %x\n", DRM_READ32(dev_priv->ring_rptr, R600_SCRATCHOFF(1)), RADEON_READ(R600_SCRATCH_REG1));
    548 	}
    549 	if (radeon_no_wb == 1) {
    550 		dev_priv->writeback_works = 0;
    551 		DRM_INFO("writeback forced off\n");
    552 	}
    553 
    554 	if (!dev_priv->writeback_works) {
    555 		/* Disable writeback to avoid unnecessary bus master transfers */
    556 		RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) | RADEON_RB_NO_UPDATE);
    557 		RADEON_WRITE(R600_SCRATCH_UMSK, 0);
    558 	}
    559 }
    560 
    561 int r600_do_engine_reset(struct drm_device * dev)
    562 {
    563 	drm_radeon_private_t *dev_priv = dev->dev_private;
    564 	u32 cp_ptr, cp_me_cntl, cp_rb_cntl;
    565 
    566 	DRM_INFO("Resetting GPU\n");
    567 
    568 	cp_ptr = RADEON_READ(R600_CP_RB_WPTR);
    569 	cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL);
    570 	RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT);
    571 
    572 	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff);
    573 	RADEON_READ(R600_GRBM_SOFT_RESET);
    574 	DRM_UDELAY(50);
    575 	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
    576 	RADEON_READ(R600_GRBM_SOFT_RESET);
    577 
    578 	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
    579 	cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
    580 	RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA);
    581 
    582 	RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
    583 	RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
    584 	RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl);
    585 	RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl);
    586 
    587 	/* Reset the CP ring */
    588 	r600_do_cp_reset(dev_priv);
    589 
    590 	/* The CP is no longer running after an engine reset */
    591 	dev_priv->cp_running = 0;
    592 
    593 	/* Reset any pending vertex, indirect buffers */
    594 	radeon_freelist_reset(dev);
    595 
    596 	return 0;
    597 
    598 }
    599 
    600 static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
    601 					     u32 num_backends,
    602 					     u32 backend_disable_mask)
    603 {
    604 	u32 backend_map = 0;
    605 	u32 enabled_backends_mask;
    606 	u32 enabled_backends_count;
    607 	u32 cur_pipe;
    608 	u32 swizzle_pipe[R6XX_MAX_PIPES];
    609 	u32 cur_backend;
    610 	u32 i;
    611 
    612 	if (num_tile_pipes > R6XX_MAX_PIPES)
    613 		num_tile_pipes = R6XX_MAX_PIPES;
    614 	if (num_tile_pipes < 1)
    615 		num_tile_pipes = 1;
    616 	if (num_backends > R6XX_MAX_BACKENDS)
    617 		num_backends = R6XX_MAX_BACKENDS;
    618 	if (num_backends < 1)
    619 		num_backends = 1;
    620 
    621 	enabled_backends_mask = 0;
    622 	enabled_backends_count = 0;
    623 	for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
    624 		if (((backend_disable_mask >> i) & 1) == 0) {
    625 			enabled_backends_mask |= (1 << i);
    626 			++enabled_backends_count;
    627 		}
    628 		if (enabled_backends_count == num_backends)
    629 			break;
    630 	}
    631 
    632 	if (enabled_backends_count == 0) {
    633 		enabled_backends_mask = 1;
    634 		enabled_backends_count = 1;
    635 	}
    636 
    637 	if (enabled_backends_count != num_backends)
    638 		num_backends = enabled_backends_count;
    639 
    640 	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
    641 	switch (num_tile_pipes) {
    642 	case 1:
    643 		swizzle_pipe[0] = 0;
    644 		break;
    645 	case 2:
    646 		swizzle_pipe[0] = 0;
    647 		swizzle_pipe[1] = 1;
    648 		break;
    649 	case 3:
    650 		swizzle_pipe[0] = 0;
    651 		swizzle_pipe[1] = 1;
    652 		swizzle_pipe[2] = 2;
    653 		break;
    654 	case 4:
    655 		swizzle_pipe[0] = 0;
    656 		swizzle_pipe[1] = 1;
    657 		swizzle_pipe[2] = 2;
    658 		swizzle_pipe[3] = 3;
    659 		break;
    660 	case 5:
    661 		swizzle_pipe[0] = 0;
    662 		swizzle_pipe[1] = 1;
    663 		swizzle_pipe[2] = 2;
    664 		swizzle_pipe[3] = 3;
    665 		swizzle_pipe[4] = 4;
    666 		break;
    667 	case 6:
    668 		swizzle_pipe[0] = 0;
    669 		swizzle_pipe[1] = 2;
    670 		swizzle_pipe[2] = 4;
    671 		swizzle_pipe[3] = 5;
    672 		swizzle_pipe[4] = 1;
    673 		swizzle_pipe[5] = 3;
    674 		break;
    675 	case 7:
    676 		swizzle_pipe[0] = 0;
    677 		swizzle_pipe[1] = 2;
    678 		swizzle_pipe[2] = 4;
    679 		swizzle_pipe[3] = 6;
    680 		swizzle_pipe[4] = 1;
    681 		swizzle_pipe[5] = 3;
    682 		swizzle_pipe[6] = 5;
    683 		break;
    684 	case 8:
    685 		swizzle_pipe[0] = 0;
    686 		swizzle_pipe[1] = 2;
    687 		swizzle_pipe[2] = 4;
    688 		swizzle_pipe[3] = 6;
    689 		swizzle_pipe[4] = 1;
    690 		swizzle_pipe[5] = 3;
    691 		swizzle_pipe[6] = 5;
    692 		swizzle_pipe[7] = 7;
    693 		break;
    694 	}
    695 
    696 	cur_backend = 0;
    697 	for (cur_pipe=0; cur_pipe<num_tile_pipes; ++cur_pipe) {
    698 		while (((1 << cur_backend) & enabled_backends_mask) == 0)
    699 			cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
    700 
    701 		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
    702 
    703 		cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
    704 	}
    705 
    706 	return backend_map;
    707 }
    708 
    709 static int r600_count_pipe_bits (uint32_t val)
    710 {
    711 	int i, ret = 0;
    712 	for (i = 0; i < 32; i++) {
    713 		ret += val & 1;
    714 		val >>= 1;
    715 	}
    716 	return ret;
    717 }
    718 
    719 static void r600_gfx_init(struct drm_device * dev,
    720 			  drm_radeon_private_t * dev_priv)
    721 {
    722 	int i, j, num_qd_pipes;
    723 	u32 sx_debug_1;
    724 	u32 tc_cntl;
    725 	u32 arb_pop;
    726 	u32 num_gs_verts_per_thread;
    727         u32 vgt_gs_per_es;
    728 	u32 gs_prim_buffer_depth = 0;
    729 	u32 sq_ms_fifo_sizes;
    730 	u32 sq_config;
    731 	u32 sq_gpr_resource_mgmt_1 = 0;
    732 	u32 sq_gpr_resource_mgmt_2 = 0;
    733 	u32 sq_thread_resource_mgmt = 0;
    734 	u32 sq_stack_resource_mgmt_1 = 0;
    735 	u32 sq_stack_resource_mgmt_2 = 0;
    736 	u32 hdp_host_path_cntl;
    737 	u32 backend_map;
    738 	u32 gb_tiling_config = 0;
    739 	u32 cc_rb_backend_disable = 0;
    740 	u32 cc_gc_shader_pipe_config = 0;
    741         u32 ramcfg;
    742 
    743 	/* setup chip specs */
    744         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
    745         case CHIP_R600:
    746 		dev_priv->r600_max_pipes = 4;
    747 		dev_priv->r600_max_tile_pipes = 8;
    748 		dev_priv->r600_max_simds = 4;
    749 		dev_priv->r600_max_backends = 4;
    750 		dev_priv->r600_max_gprs = 256;
    751 		dev_priv->r600_max_threads = 192;
    752 		dev_priv->r600_max_stack_entries = 256;
    753 		dev_priv->r600_max_hw_contexts = 8;
    754 		dev_priv->r600_max_gs_threads = 16;
    755 		dev_priv->r600_sx_max_export_size = 128;
    756 		dev_priv->r600_sx_max_export_pos_size = 16;
    757 		dev_priv->r600_sx_max_export_smx_size = 128;
    758 		dev_priv->r600_sq_num_cf_insts = 2;
    759 		break;
    760         case CHIP_RV630:
    761 	case CHIP_RV635:
    762 		dev_priv->r600_max_pipes = 2;
    763 		dev_priv->r600_max_tile_pipes = 2;
    764 		dev_priv->r600_max_simds = 3;
    765 		dev_priv->r600_max_backends = 1;
    766 		dev_priv->r600_max_gprs = 128;
    767 		dev_priv->r600_max_threads = 192;
    768 		dev_priv->r600_max_stack_entries = 128;
    769 		dev_priv->r600_max_hw_contexts = 8;
    770 		dev_priv->r600_max_gs_threads = 4;
    771 		dev_priv->r600_sx_max_export_size = 128;
    772 		dev_priv->r600_sx_max_export_pos_size = 16;
    773 		dev_priv->r600_sx_max_export_smx_size = 128;
    774 		dev_priv->r600_sq_num_cf_insts = 2;
    775 		break;
    776         case CHIP_RV610:
    777         case CHIP_RS780:
    778         case CHIP_RS880:
    779         case CHIP_RV620:
    780 		dev_priv->r600_max_pipes = 1;
    781 		dev_priv->r600_max_tile_pipes = 1;
    782 		dev_priv->r600_max_simds = 2;
    783 		dev_priv->r600_max_backends = 1;
    784 		dev_priv->r600_max_gprs = 128;
    785 		dev_priv->r600_max_threads = 192;
    786 		dev_priv->r600_max_stack_entries = 128;
    787 		dev_priv->r600_max_hw_contexts = 4;
    788 		dev_priv->r600_max_gs_threads = 4;
    789 		dev_priv->r600_sx_max_export_size = 128;
    790 		dev_priv->r600_sx_max_export_pos_size = 16;
    791 		dev_priv->r600_sx_max_export_smx_size = 128;
    792 		dev_priv->r600_sq_num_cf_insts = 1;
    793 		break;
    794         case CHIP_RV670:
    795 		dev_priv->r600_max_pipes = 4;
    796 		dev_priv->r600_max_tile_pipes = 4;
    797 		dev_priv->r600_max_simds = 4;
    798 		dev_priv->r600_max_backends = 4;
    799 		dev_priv->r600_max_gprs = 192;
    800 		dev_priv->r600_max_threads = 192;
    801 		dev_priv->r600_max_stack_entries = 256;
    802 		dev_priv->r600_max_hw_contexts = 8;
    803 		dev_priv->r600_max_gs_threads = 16;
    804 		dev_priv->r600_sx_max_export_size = 128;
    805 		dev_priv->r600_sx_max_export_pos_size = 16;
    806 		dev_priv->r600_sx_max_export_smx_size = 128;
    807 		dev_priv->r600_sq_num_cf_insts = 2;
    808 		break;
    809         default:
    810 		break;
    811         }
    812 
    813 	/* Initialize HDP */
    814 	j = 0;
    815 	for (i = 0; i < 32; i++) {
    816 		RADEON_WRITE((0x2c14 + j), 0x00000000);
    817 		RADEON_WRITE((0x2c18 + j), 0x00000000);
    818 		RADEON_WRITE((0x2c1c + j), 0x00000000);
    819 		RADEON_WRITE((0x2c20 + j), 0x00000000);
    820 		RADEON_WRITE((0x2c24 + j), 0x00000000);
    821 		j += 0x18;
    822 	}
    823 
    824 	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
    825 
    826 	/* setup tiling, simd, pipe config */
    827 	ramcfg = RADEON_READ(R600_RAMCFG);
    828 
    829 	switch (dev_priv->r600_max_tile_pipes) {
    830 	case 1:
    831 		gb_tiling_config |= R600_PIPE_TILING(0);
    832                 break;
    833 	case 2:
    834 		gb_tiling_config |= R600_PIPE_TILING(1);
    835                 break;
    836 	case 4:
    837 		gb_tiling_config |= R600_PIPE_TILING(2);
    838                 break;
    839 	case 8:
    840 		gb_tiling_config |= R600_PIPE_TILING(3);
    841                 break;
    842 	default:
    843 		break;
    844 	}
    845 
    846 	gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK);
    847 
    848 	gb_tiling_config |= R600_GROUP_SIZE(0);
    849 
    850 	if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) {
    851 		gb_tiling_config |= R600_ROW_TILING(3);
    852 		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
    853 	} else {
    854 		gb_tiling_config |=
    855 			R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
    856 		gb_tiling_config |=
    857 			R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK));
    858 	}
    859 
    860 	gb_tiling_config |= R600_BANK_SWAPS(1);
    861 
    862 	backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
    863 							dev_priv->r600_max_backends,
    864 							(0xff << dev_priv->r600_max_backends) & 0xff);
    865 	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
    866 
    867 	cc_gc_shader_pipe_config =
    868 		R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
    869 	cc_gc_shader_pipe_config |=
    870 		R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
    871 
    872 	cc_rb_backend_disable =
    873 		R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
    874 
    875 	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
    876 	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
    877 	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
    878 
    879 	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
    880 	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
    881 	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
    882 
    883 	num_qd_pipes =
    884 		R6XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
    885 	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
    886 	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
    887 
    888 	/* set HW defaults for 3D engine */
    889 	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
    890 						R600_ROQ_IB2_START(0x2b)));
    891 
    892 	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) |
    893 					      R600_ROQ_END(0x40)));
    894 
    895 	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
    896 					R600_SYNC_GRADIENT |
    897 					R600_SYNC_WALKER |
    898 					R600_SYNC_ALIGNER));
    899 
    900 	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)
    901 		RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021);
    902 
    903 	sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1);
    904 	sx_debug_1 |= R600_SMX_EVENT_RELEASE;
    905 	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600))
    906 		sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS;
    907 	RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1);
    908 
    909 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
    910 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
    911 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
    912 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
    913 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
    914 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
    915 		RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
    916 	else
    917 		RADEON_WRITE(R600_DB_DEBUG, 0);
    918 
    919         RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) |
    920 					  R600_DEPTH_FLUSH(16) |
    921 					  R600_DEPTH_PENDING_FREE(4) |
    922 					  R600_DEPTH_CACHELINE_FREE(16)));
    923         RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
    924         RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0);
    925 
    926 	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
    927 	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0));
    928 
    929 	sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
    930 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
    931 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
    932 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
    933 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
    934 		sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
    935 				    R600_FETCH_FIFO_HIWATER(0xa) |
    936 				    R600_DONE_FIFO_HIWATER(0xe0) |
    937 				    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
    938 	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) ||
    939 		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) {
    940 		sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff);
    941 		sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4);
    942 	}
    943 	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
    944 
    945 	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
    946 	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
    947 	 */
    948 	sq_config = RADEON_READ(R600_SQ_CONFIG);
    949 	sq_config &= ~(R600_PS_PRIO(3) |
    950 		       R600_VS_PRIO(3) |
    951 		       R600_GS_PRIO(3) |
    952 		       R600_ES_PRIO(3));
    953 	sq_config |= (R600_DX9_CONSTS |
    954 		      R600_VC_ENABLE |
    955 		      R600_PS_PRIO(0) |
    956 		      R600_VS_PRIO(1) |
    957 		      R600_GS_PRIO(2) |
    958 		      R600_ES_PRIO(3));
    959 
    960 	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) {
    961 		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) |
    962 					  R600_NUM_VS_GPRS(124) |
    963 					  R600_NUM_CLAUSE_TEMP_GPRS(4));
    964 		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) |
    965 					  R600_NUM_ES_GPRS(0));
    966 		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) |
    967 					   R600_NUM_VS_THREADS(48) |
    968 					   R600_NUM_GS_THREADS(4) |
    969 					   R600_NUM_ES_THREADS(4));
    970 		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) |
    971 					    R600_NUM_VS_STACK_ENTRIES(128));
    972 		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) |
    973 					    R600_NUM_ES_STACK_ENTRIES(0));
    974 	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
    975 		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
    976 		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
    977 		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
    978 		/* no vertex cache */
    979 		sq_config &= ~R600_VC_ENABLE;
    980 
    981 		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
    982 					  R600_NUM_VS_GPRS(44) |
    983 					  R600_NUM_CLAUSE_TEMP_GPRS(2));
    984 		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
    985 					  R600_NUM_ES_GPRS(17));
    986 		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
    987 					   R600_NUM_VS_THREADS(78) |
    988 					   R600_NUM_GS_THREADS(4) |
    989 					   R600_NUM_ES_THREADS(31));
    990 		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
    991 					    R600_NUM_VS_STACK_ENTRIES(40));
    992 		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
    993 					    R600_NUM_ES_STACK_ENTRIES(16));
    994 	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
    995 		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)){
    996 		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
    997 					  R600_NUM_VS_GPRS(44) |
    998 					  R600_NUM_CLAUSE_TEMP_GPRS(2));
    999 		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) |
   1000 					  R600_NUM_ES_GPRS(18));
   1001 		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
   1002 					   R600_NUM_VS_THREADS(78) |
   1003 					   R600_NUM_GS_THREADS(4) |
   1004 					   R600_NUM_ES_THREADS(31));
   1005 		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) |
   1006 					    R600_NUM_VS_STACK_ENTRIES(40));
   1007 		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) |
   1008 					    R600_NUM_ES_STACK_ENTRIES(16));
   1009 	} else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) {
   1010 		sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) |
   1011 					  R600_NUM_VS_GPRS(44) |
   1012 					  R600_NUM_CLAUSE_TEMP_GPRS(2));
   1013 		sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) |
   1014 					  R600_NUM_ES_GPRS(17));
   1015 		sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) |
   1016 					   R600_NUM_VS_THREADS(78) |
   1017 					   R600_NUM_GS_THREADS(4) |
   1018 					   R600_NUM_ES_THREADS(31));
   1019 		sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) |
   1020 					    R600_NUM_VS_STACK_ENTRIES(64));
   1021 		sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) |
   1022 					    R600_NUM_ES_STACK_ENTRIES(64));
   1023 	}
   1024 
   1025         RADEON_WRITE(R600_SQ_CONFIG, sq_config);
   1026         RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
   1027         RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
   1028         RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
   1029         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
   1030         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
   1031 
   1032 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
   1033 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
   1034 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
   1035 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
   1036 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
   1037 	else
   1038 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
   1039 
   1040 	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) |
   1041 						    R600_S0_Y(0x4) |
   1042 						    R600_S1_X(0x4) |
   1043 						    R600_S1_Y(0xc)));
   1044 	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) |
   1045 						    R600_S0_Y(0xe) |
   1046 						    R600_S1_X(0x2) |
   1047 						    R600_S1_Y(0x2) |
   1048 						    R600_S2_X(0xa) |
   1049 						    R600_S2_Y(0x6) |
   1050 						    R600_S3_X(0x6) |
   1051 						    R600_S3_Y(0xa)));
   1052 	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) |
   1053 							R600_S0_Y(0xb) |
   1054 							R600_S1_X(0x4) |
   1055 							R600_S1_Y(0xc) |
   1056 							R600_S2_X(0x1) |
   1057 							R600_S2_Y(0x6) |
   1058 							R600_S3_X(0xa) |
   1059 							R600_S3_Y(0xe)));
   1060 	RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) |
   1061 							R600_S4_Y(0x1) |
   1062 							R600_S5_X(0x0) |
   1063 							R600_S5_Y(0x0) |
   1064 							R600_S6_X(0xb) |
   1065 							R600_S6_Y(0x4) |
   1066 							R600_S7_X(0x7) |
   1067 							R600_S7_Y(0x8)));
   1068 
   1069 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
   1070         case CHIP_R600:
   1071         case CHIP_RV630:
   1072 	case CHIP_RV635:
   1073 		gs_prim_buffer_depth = 0;
   1074 		break;
   1075         case CHIP_RV610:
   1076         case CHIP_RS780:
   1077 	case CHIP_RS880:
   1078         case CHIP_RV620:
   1079 		gs_prim_buffer_depth = 32;
   1080 		break;
   1081         case CHIP_RV670:
   1082 		gs_prim_buffer_depth = 128;
   1083 		break;
   1084         default:
   1085 		break;
   1086         }
   1087 
   1088         num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
   1089         vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
   1090         /* Max value for this is 256 */
   1091         if (vgt_gs_per_es > 256)
   1092 		vgt_gs_per_es = 256;
   1093 
   1094         RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
   1095         RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
   1096         RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
   1097         RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
   1098 
   1099 	/* more default values. 2D/3D driver should adjust as needed */
   1100         RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
   1101         RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
   1102         RADEON_WRITE(R600_SX_MISC, 0);
   1103         RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
   1104         RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
   1105         RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
   1106         RADEON_WRITE(R600_SPI_INPUT_Z, 0);
   1107         RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
   1108         RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
   1109 
   1110 	/* clear render buffer base addresses */
   1111 	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
   1112 	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
   1113 	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
   1114 	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
   1115 	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
   1116 	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
   1117 	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
   1118 	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
   1119 
   1120         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
   1121         case CHIP_RV610:
   1122         case CHIP_RS780:
   1123 	case CHIP_RS880:
   1124         case CHIP_RV620:
   1125 		tc_cntl = R600_TC_L2_SIZE(8);
   1126 		break;
   1127         case CHIP_RV630:
   1128         case CHIP_RV635:
   1129 		tc_cntl = R600_TC_L2_SIZE(4);
   1130 		break;
   1131         case CHIP_R600:
   1132 		tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT;
   1133 		break;
   1134         default:
   1135 		tc_cntl = R600_TC_L2_SIZE(0);
   1136 		break;
   1137         }
   1138 
   1139 	RADEON_WRITE(R600_TC_CNTL, tc_cntl);
   1140 
   1141 	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
   1142 	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
   1143 
   1144 	arb_pop = RADEON_READ(R600_ARB_POP);
   1145 	arb_pop |= R600_ENABLE_TC128;
   1146 	RADEON_WRITE(R600_ARB_POP, arb_pop);
   1147 
   1148 	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
   1149 	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
   1150 					  R600_NUM_CLIP_SEQ(3)));
   1151 	RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095));
   1152 
   1153 }
   1154 
   1155 static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
   1156 					     u32 num_backends,
   1157 					     u32 backend_disable_mask)
   1158 {
   1159 	u32 backend_map = 0;
   1160 	u32 enabled_backends_mask;
   1161 	u32 enabled_backends_count;
   1162 	u32 cur_pipe;
   1163 	u32 swizzle_pipe[R7XX_MAX_PIPES];
   1164 	u32 cur_backend;
   1165 	u32 i;
   1166 
   1167 	if (num_tile_pipes > R7XX_MAX_PIPES)
   1168 		num_tile_pipes = R7XX_MAX_PIPES;
   1169 	if (num_tile_pipes < 1)
   1170 		num_tile_pipes = 1;
   1171 	if (num_backends > R7XX_MAX_BACKENDS)
   1172 		num_backends = R7XX_MAX_BACKENDS;
   1173 	if (num_backends < 1)
   1174 		num_backends = 1;
   1175 
   1176 	enabled_backends_mask = 0;
   1177 	enabled_backends_count = 0;
   1178 	for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
   1179 		if (((backend_disable_mask >> i) & 1) == 0) {
   1180 			enabled_backends_mask |= (1 << i);
   1181 			++enabled_backends_count;
   1182 		}
   1183 		if (enabled_backends_count == num_backends)
   1184 			break;
   1185 	}
   1186 
   1187 	if (enabled_backends_count == 0) {
   1188 		enabled_backends_mask = 1;
   1189 		enabled_backends_count = 1;
   1190 	}
   1191 
   1192 	if (enabled_backends_count != num_backends)
   1193 		num_backends = enabled_backends_count;
   1194 
   1195 	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
   1196 	switch (num_tile_pipes) {
   1197 	case 1:
   1198 		swizzle_pipe[0] = 0;
   1199 		break;
   1200 	case 2:
   1201 		swizzle_pipe[0] = 0;
   1202 		swizzle_pipe[1] = 1;
   1203 		break;
   1204 	case 3:
   1205 		swizzle_pipe[0] = 0;
   1206 		swizzle_pipe[1] = 2;
   1207 		swizzle_pipe[2] = 1;
   1208 		break;
   1209 	case 4:
   1210 		swizzle_pipe[0] = 0;
   1211 		swizzle_pipe[1] = 2;
   1212 		swizzle_pipe[2] = 3;
   1213 		swizzle_pipe[3] = 1;
   1214 		break;
   1215 	case 5:
   1216 		swizzle_pipe[0] = 0;
   1217 		swizzle_pipe[1] = 2;
   1218 		swizzle_pipe[2] = 4;
   1219 		swizzle_pipe[3] = 1;
   1220 		swizzle_pipe[4] = 3;
   1221 		break;
   1222 	case 6:
   1223 		swizzle_pipe[0] = 0;
   1224 		swizzle_pipe[1] = 2;
   1225 		swizzle_pipe[2] = 4;
   1226 		swizzle_pipe[3] = 5;
   1227 		swizzle_pipe[4] = 3;
   1228 		swizzle_pipe[5] = 1;
   1229 		break;
   1230 	case 7:
   1231 		swizzle_pipe[0] = 0;
   1232 		swizzle_pipe[1] = 2;
   1233 		swizzle_pipe[2] = 4;
   1234 		swizzle_pipe[3] = 6;
   1235 		swizzle_pipe[4] = 3;
   1236 		swizzle_pipe[5] = 1;
   1237 		swizzle_pipe[6] = 5;
   1238 		break;
   1239 	case 8:
   1240 		swizzle_pipe[0] = 0;
   1241 		swizzle_pipe[1] = 2;
   1242 		swizzle_pipe[2] = 4;
   1243 		swizzle_pipe[3] = 6;
   1244 		swizzle_pipe[4] = 3;
   1245 		swizzle_pipe[5] = 1;
   1246 		swizzle_pipe[6] = 7;
   1247 		swizzle_pipe[7] = 5;
   1248 		break;
   1249 	}
   1250 
   1251 	cur_backend = 0;
   1252 	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
   1253 		while (((1 << cur_backend) & enabled_backends_mask) == 0)
   1254 			cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
   1255 
   1256 		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
   1257 
   1258 		cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
   1259 	}
   1260 
   1261 	return backend_map;
   1262 }
   1263 
   1264 static void r700_gfx_init(struct drm_device * dev,
   1265 			  drm_radeon_private_t * dev_priv)
   1266 {
   1267 	int i, j, num_qd_pipes;
   1268 	u32 sx_debug_1;
   1269 	u32 smx_dc_ctl0;
   1270 	u32 num_gs_verts_per_thread;
   1271         u32 vgt_gs_per_es;
   1272 	u32 gs_prim_buffer_depth = 0;
   1273 	u32 sq_ms_fifo_sizes;
   1274 	u32 sq_config;
   1275 	u32 sq_thread_resource_mgmt;
   1276 	u32 hdp_host_path_cntl;
   1277 	u32 sq_dyn_gpr_size_simd_ab_0;
   1278 	u32 backend_map;
   1279 	u32 gb_tiling_config = 0;
   1280 	u32 cc_rb_backend_disable = 0;
   1281 	u32 cc_gc_shader_pipe_config = 0;
   1282         u32 mc_arb_ramcfg;
   1283 	u32 db_debug4;
   1284 
   1285 	/* setup chip specs */
   1286         switch (dev_priv->flags & RADEON_FAMILY_MASK) {
   1287 	case CHIP_RV770:
   1288 		dev_priv->r600_max_pipes = 4;
   1289 		dev_priv->r600_max_tile_pipes = 8;
   1290 		dev_priv->r600_max_simds = 10;
   1291 		dev_priv->r600_max_backends = 4;
   1292 		dev_priv->r600_max_gprs = 256;
   1293 		dev_priv->r600_max_threads = 248;
   1294 		dev_priv->r600_max_stack_entries = 512;
   1295 		dev_priv->r600_max_hw_contexts = 8;
   1296 		dev_priv->r600_max_gs_threads = 16 * 2;
   1297 		dev_priv->r600_sx_max_export_size = 128;
   1298 		dev_priv->r600_sx_max_export_pos_size = 16;
   1299 		dev_priv->r600_sx_max_export_smx_size = 112;
   1300 		dev_priv->r600_sq_num_cf_insts = 2;
   1301 
   1302 		dev_priv->r700_sx_num_of_sets = 7;
   1303 		dev_priv->r700_sc_prim_fifo_size = 0xF9;
   1304 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
   1305 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
   1306 		break;
   1307 	case CHIP_RV740:
   1308 		dev_priv->r600_max_pipes = 4;
   1309 		dev_priv->r600_max_tile_pipes = 4;
   1310 		dev_priv->r600_max_simds = 8;
   1311 		dev_priv->r600_max_backends = 4;
   1312 		dev_priv->r600_max_gprs = 256;
   1313 		dev_priv->r600_max_threads = 248;
   1314 		dev_priv->r600_max_stack_entries = 512;
   1315 		dev_priv->r600_max_hw_contexts = 8;
   1316 		dev_priv->r600_max_gs_threads = 16 * 2;
   1317 		dev_priv->r600_sx_max_export_size = 256;
   1318 		dev_priv->r600_sx_max_export_pos_size = 32;
   1319 		dev_priv->r600_sx_max_export_smx_size = 224;
   1320 		dev_priv->r600_sq_num_cf_insts = 2;
   1321 
   1322 		dev_priv->r700_sx_num_of_sets = 7;
   1323 		dev_priv->r700_sc_prim_fifo_size = 0x100;
   1324 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
   1325 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
   1326 
   1327 		if (dev_priv->r600_sx_max_export_pos_size > 16) {
   1328 			dev_priv->r600_sx_max_export_pos_size -= 16;
   1329 			dev_priv->r600_sx_max_export_smx_size += 16;
   1330 		}
   1331 		break;
   1332 	case CHIP_RV730:
   1333 		dev_priv->r600_max_pipes = 2;
   1334 		dev_priv->r600_max_tile_pipes = 4;
   1335 		dev_priv->r600_max_simds = 8;
   1336 		dev_priv->r600_max_backends = 2;
   1337 		dev_priv->r600_max_gprs = 128;
   1338 		dev_priv->r600_max_threads = 248;
   1339 		dev_priv->r600_max_stack_entries = 256;
   1340 		dev_priv->r600_max_hw_contexts = 8;
   1341 		dev_priv->r600_max_gs_threads = 16 * 2;
   1342 		dev_priv->r600_sx_max_export_size = 256;
   1343 		dev_priv->r600_sx_max_export_pos_size = 32;
   1344 		dev_priv->r600_sx_max_export_smx_size = 224;
   1345 		dev_priv->r600_sq_num_cf_insts = 2;
   1346 
   1347 		dev_priv->r700_sx_num_of_sets = 7;
   1348 		dev_priv->r700_sc_prim_fifo_size = 0xf9;
   1349 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
   1350 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
   1351 
   1352 		if (dev_priv->r600_sx_max_export_pos_size > 16) {
   1353 			dev_priv->r600_sx_max_export_pos_size -= 16;
   1354 			dev_priv->r600_sx_max_export_smx_size += 16;
   1355 		}
   1356 		break;
   1357 	case CHIP_RV710:
   1358 		dev_priv->r600_max_pipes = 2;
   1359 		dev_priv->r600_max_tile_pipes = 2;
   1360 		dev_priv->r600_max_simds = 2;
   1361 		dev_priv->r600_max_backends = 1;
   1362 		dev_priv->r600_max_gprs = 256;
   1363 		dev_priv->r600_max_threads = 192;
   1364 		dev_priv->r600_max_stack_entries = 256;
   1365 		dev_priv->r600_max_hw_contexts = 4;
   1366 		dev_priv->r600_max_gs_threads = 8 * 2;
   1367 		dev_priv->r600_sx_max_export_size = 128;
   1368 		dev_priv->r600_sx_max_export_pos_size = 16;
   1369 		dev_priv->r600_sx_max_export_smx_size = 112;
   1370 		dev_priv->r600_sq_num_cf_insts = 1;
   1371 
   1372 		dev_priv->r700_sx_num_of_sets = 7;
   1373 		dev_priv->r700_sc_prim_fifo_size = 0x40;
   1374 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
   1375 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
   1376 		break;
   1377         default:
   1378 		break;
   1379         }
   1380 
   1381 	/* Initialize HDP */
   1382 	j = 0;
   1383 	for (i = 0; i < 32; i++) {
   1384 		RADEON_WRITE((0x2c14 + j), 0x00000000);
   1385 		RADEON_WRITE((0x2c18 + j), 0x00000000);
   1386 		RADEON_WRITE((0x2c1c + j), 0x00000000);
   1387 		RADEON_WRITE((0x2c20 + j), 0x00000000);
   1388 		RADEON_WRITE((0x2c24 + j), 0x00000000);
   1389 		j += 0x18;
   1390 	}
   1391 
   1392 	RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff));
   1393 
   1394 	/* setup tiling, simd, pipe config */
   1395 	mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG);
   1396 
   1397 	switch (dev_priv->r600_max_tile_pipes) {
   1398 	case 1:
   1399 		gb_tiling_config |= R600_PIPE_TILING(0);
   1400                 break;
   1401 	case 2:
   1402 		gb_tiling_config |= R600_PIPE_TILING(1);
   1403                 break;
   1404 	case 4:
   1405 		gb_tiling_config |= R600_PIPE_TILING(2);
   1406                 break;
   1407 	case 8:
   1408 		gb_tiling_config |= R600_PIPE_TILING(3);
   1409                 break;
   1410 	default:
   1411 		break;
   1412 	}
   1413 
   1414 	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
   1415 		gb_tiling_config |= R600_BANK_TILING(1);
   1416 	else
   1417 		gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK);
   1418 
   1419 	gb_tiling_config |= R600_GROUP_SIZE(0);
   1420 
   1421 	if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) {
   1422 		gb_tiling_config |= R600_ROW_TILING(3);
   1423 		gb_tiling_config |= R600_SAMPLE_SPLIT(3);
   1424 	} else {
   1425 		gb_tiling_config |=
   1426 			R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
   1427 		gb_tiling_config |=
   1428 			R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK));
   1429 	}
   1430 
   1431 	gb_tiling_config |= R600_BANK_SWAPS(1);
   1432 
   1433 	backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
   1434 							dev_priv->r600_max_backends,
   1435 							(0xff << dev_priv->r600_max_backends) & 0xff);
   1436 	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
   1437 
   1438 	cc_gc_shader_pipe_config =
   1439 		R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
   1440 	cc_gc_shader_pipe_config |=
   1441 		R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
   1442 
   1443 	cc_rb_backend_disable =
   1444 		R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
   1445 
   1446 	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
   1447 	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
   1448 	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
   1449 
   1450 	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
   1451 	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
   1452 	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
   1453 
   1454 	RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
   1455 	RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
   1456 	RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
   1457 	RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
   1458 	RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
   1459 
   1460 	num_qd_pipes =
   1461 		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
   1462 	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
   1463 	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
   1464 
   1465 	/* set HW defaults for 3D engine */
   1466 	RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) |
   1467 						R600_ROQ_IB2_START(0x2b)));
   1468 
   1469         RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
   1470 
   1471 	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
   1472 					R600_SYNC_GRADIENT |
   1473 					R600_SYNC_WALKER |
   1474 					R600_SYNC_ALIGNER));
   1475 
   1476 	sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
   1477 	sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
   1478 	RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1);
   1479 
   1480 	smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0);
   1481 	smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff);
   1482 	smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
   1483 	RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
   1484 
   1485 	RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
   1486 					  R700_GS_FLUSH_CTL(4) |
   1487 					  R700_ACK_FLUSH_CTL(3) |
   1488 					  R700_SYNC_FLUSH_CTL));
   1489 
   1490 	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
   1491 		RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f));
   1492 	else {
   1493 		db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
   1494 		db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
   1495 		RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
   1496 	}
   1497 
   1498 	RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) |
   1499 						   R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) |
   1500 						   R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1)));
   1501 
   1502 	RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) |
   1503 						 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) |
   1504 						 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize)));
   1505 
   1506         RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
   1507 
   1508         RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1);
   1509 
   1510 	RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0));
   1511 
   1512 	RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4));
   1513 
   1514         RADEON_WRITE(R600_CP_PERFMON_CNTL, 0);
   1515 
   1516        	sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) |
   1517 			    R600_DONE_FIFO_HIWATER(0xe0) |
   1518 			    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
   1519 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
   1520 	case CHIP_RV770:
   1521 		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
   1522 		break;
   1523 	case CHIP_RV740:
   1524 	case CHIP_RV730:
   1525 	case CHIP_RV710:
   1526 	default:
   1527 		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
   1528 		break;
   1529 	}
   1530 	RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
   1531 
   1532 	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
   1533 	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
   1534 	 */
   1535 	sq_config = RADEON_READ(R600_SQ_CONFIG);
   1536 	sq_config &= ~(R600_PS_PRIO(3) |
   1537 		       R600_VS_PRIO(3) |
   1538 		       R600_GS_PRIO(3) |
   1539 		       R600_ES_PRIO(3));
   1540 	sq_config |= (R600_DX9_CONSTS |
   1541 		      R600_VC_ENABLE |
   1542 		      R600_EXPORT_SRC_C |
   1543 		      R600_PS_PRIO(0) |
   1544 		      R600_VS_PRIO(1) |
   1545 		      R600_GS_PRIO(2) |
   1546 		      R600_ES_PRIO(3));
   1547 	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
   1548 		/* no vertex cache */
   1549 		sq_config &= ~R600_VC_ENABLE;
   1550 
   1551 	RADEON_WRITE(R600_SQ_CONFIG, sq_config);
   1552 
   1553 	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1,  (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
   1554 						    R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) |
   1555 						    R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2)));
   1556 
   1557 	RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2,  (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) |
   1558 						    R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64)));
   1559 
   1560 	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) |
   1561 				   R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) |
   1562 				   R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8));
   1563 	if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads)
   1564 		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads);
   1565 	else
   1566 		sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8);
   1567 	RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
   1568 
   1569         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
   1570 						     R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
   1571 
   1572         RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) |
   1573 						     R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4)));
   1574 
   1575 	sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) |
   1576 				     R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) |
   1577 				     R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) |
   1578 				     R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64));
   1579 
   1580         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
   1581         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
   1582         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
   1583         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
   1584         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
   1585         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
   1586         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
   1587         RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
   1588 
   1589 	RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) |
   1590 						     R700_FORCE_EOV_MAX_REZ_CNT(255)));
   1591 
   1592 	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)
   1593 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) |
   1594 							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
   1595 	else
   1596 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) |
   1597 							   R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO)));
   1598 
   1599 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
   1600 	case CHIP_RV770:
   1601 	case CHIP_RV740:
   1602 	case CHIP_RV730:
   1603 		gs_prim_buffer_depth = 384;
   1604 		break;
   1605 	case CHIP_RV710:
   1606 		gs_prim_buffer_depth = 128;
   1607 		break;
   1608         default:
   1609 		break;
   1610         }
   1611 
   1612         num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16;
   1613         vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
   1614         /* Max value for this is 256 */
   1615         if (vgt_gs_per_es > 256)
   1616 		vgt_gs_per_es = 256;
   1617 
   1618         RADEON_WRITE(R600_VGT_ES_PER_GS, 128);
   1619         RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es);
   1620         RADEON_WRITE(R600_VGT_GS_PER_VS, 2);
   1621 
   1622 	/* more default values. 2D/3D driver should adjust as needed */
   1623 	RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16);
   1624 	RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0);
   1625 	RADEON_WRITE(R600_VGT_STRMOUT_EN, 0);
   1626 	RADEON_WRITE(R600_SX_MISC, 0);
   1627         RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0);
   1628         RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa);
   1629         RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0);
   1630         RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff);
   1631         RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0);
   1632         RADEON_WRITE(R600_SPI_INPUT_Z, 0);
   1633         RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2));
   1634         RADEON_WRITE(R600_CB_COLOR7_FRAG, 0);
   1635 
   1636 	/* clear render buffer base addresses */
   1637 	RADEON_WRITE(R600_CB_COLOR0_BASE, 0);
   1638 	RADEON_WRITE(R600_CB_COLOR1_BASE, 0);
   1639 	RADEON_WRITE(R600_CB_COLOR2_BASE, 0);
   1640 	RADEON_WRITE(R600_CB_COLOR3_BASE, 0);
   1641 	RADEON_WRITE(R600_CB_COLOR4_BASE, 0);
   1642 	RADEON_WRITE(R600_CB_COLOR5_BASE, 0);
   1643 	RADEON_WRITE(R600_CB_COLOR6_BASE, 0);
   1644 	RADEON_WRITE(R600_CB_COLOR7_BASE, 0);
   1645 
   1646         RADEON_WRITE(R700_TCP_CNTL, 0);
   1647 
   1648 	hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL);
   1649 	RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
   1650 
   1651 	RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0);
   1652 
   1653 	RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA |
   1654 					  R600_NUM_CLIP_SEQ(3)));
   1655 
   1656 }
   1657 
   1658 static void r600_cp_init_ring_buffer(struct drm_device * dev,
   1659 			      drm_radeon_private_t * dev_priv)
   1660 {
   1661 	u32 ring_start;
   1662 	u64 rptr_addr;
   1663 	/*u32 cur_read_ptr;*/
   1664 	/*u32 tmp;*/
   1665 
   1666 	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
   1667 		r700_gfx_init(dev, dev_priv);
   1668 	else
   1669 		r600_gfx_init(dev, dev_priv);
   1670 
   1671 	RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
   1672 	RADEON_READ(R600_GRBM_SOFT_RESET);
   1673 	DRM_UDELAY(15000);
   1674 	RADEON_WRITE(R600_GRBM_SOFT_RESET, 0);
   1675 
   1676 
   1677 	/* Set ring buffer size */
   1678 #ifdef __BIG_ENDIAN
   1679 	RADEON_WRITE(R600_CP_RB_CNTL,
   1680 		     RADEON_BUF_SWAP_32BIT |
   1681 		     RADEON_RB_NO_UPDATE |
   1682 		     (dev_priv->ring.rptr_update_l2qw << 8) |
   1683 		     dev_priv->ring.size_l2qw);
   1684 #else
   1685 	RADEON_WRITE(R600_CP_RB_CNTL,
   1686 		     RADEON_RB_NO_UPDATE |
   1687 		     (dev_priv->ring.rptr_update_l2qw << 8) |
   1688 		     dev_priv->ring.size_l2qw);
   1689 #endif
   1690 
   1691 	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
   1692 
   1693 	/* Set the write pointer delay */
   1694 	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
   1695 
   1696 #ifdef __BIG_ENDIAN
   1697 	RADEON_WRITE(R600_CP_RB_CNTL,
   1698 		     RADEON_BUF_SWAP_32BIT |
   1699 		     RADEON_RB_NO_UPDATE |
   1700 		     RADEON_RB_RPTR_WR_ENA |
   1701 		     (dev_priv->ring.rptr_update_l2qw << 8) |
   1702 		     dev_priv->ring.size_l2qw);
   1703 #else
   1704 	RADEON_WRITE(R600_CP_RB_CNTL,
   1705 		     RADEON_RB_NO_UPDATE |
   1706 		     RADEON_RB_RPTR_WR_ENA |
   1707 		     (dev_priv->ring.rptr_update_l2qw << 8) |
   1708 		     dev_priv->ring.size_l2qw);
   1709 #endif
   1710 
   1711 	/* Initialize the ring buffer's read and write pointers */
   1712 #if 0
   1713 	cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
   1714 	RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
   1715 	SET_RING_HEAD(dev_priv, cur_read_ptr);
   1716 	dev_priv->ring.tail = cur_read_ptr;
   1717 
   1718 #endif
   1719 
   1720 	RADEON_WRITE(R600_CP_RB_RPTR_WR, 0);
   1721 	RADEON_WRITE(R600_CP_RB_WPTR, 0);
   1722 	SET_RING_HEAD(dev_priv, 0);
   1723 	dev_priv->ring.tail = 0;
   1724 
   1725 #if __OS_HAS_AGP
   1726 	if (dev_priv->flags & RADEON_IS_AGP) {
   1727 		rptr_addr = dev_priv->ring_rptr->offset
   1728 			- dev->agp->base +
   1729 			dev_priv->gart_vm_start;
   1730 	} else
   1731 #endif
   1732 	{
   1733 		rptr_addr = dev_priv->ring_rptr->offset
   1734 			- ((unsigned long) dev->sg->virtual)
   1735 			+ dev_priv->gart_vm_start;
   1736 	}
   1737 
   1738 	RADEON_WRITE(R600_CP_RB_RPTR_ADDR,
   1739 		     rptr_addr & 0xffffffff);
   1740 	RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,
   1741 		     upper_32_bits(rptr_addr));
   1742 
   1743 #ifdef __BIG_ENDIAN
   1744 	RADEON_WRITE(R600_CP_RB_CNTL,
   1745 		     RADEON_BUF_SWAP_32BIT |
   1746 		     (dev_priv->ring.rptr_update_l2qw << 8) |
   1747 		     dev_priv->ring.size_l2qw);
   1748 #else
   1749 	RADEON_WRITE(R600_CP_RB_CNTL,
   1750 		     (dev_priv->ring.rptr_update_l2qw << 8) |
   1751 		     dev_priv->ring.size_l2qw);
   1752 #endif
   1753 
   1754 #if __OS_HAS_AGP
   1755 	if (dev_priv->flags & RADEON_IS_AGP) {
   1756 		// XXX
   1757 		radeon_write_agp_base(dev_priv, dev->agp->base);
   1758 
   1759 		// XXX
   1760 		radeon_write_agp_location(dev_priv,
   1761 			     (((dev_priv->gart_vm_start - 1 +
   1762 				dev_priv->gart_size) & 0xffff0000) |
   1763 			      (dev_priv->gart_vm_start >> 16)));
   1764 
   1765 		ring_start = (dev_priv->cp_ring->offset
   1766 			      - dev->agp->base
   1767 			      + dev_priv->gart_vm_start);
   1768 	} else
   1769 #endif
   1770 		ring_start = (dev_priv->cp_ring->offset
   1771 			      - (unsigned long)dev->sg->virtual
   1772 			      + dev_priv->gart_vm_start);
   1773 
   1774 	RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8);
   1775 
   1776 	RADEON_WRITE(R600_CP_ME_CNTL, 0xff);
   1777 
   1778 	RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28));
   1779 
   1780 	/* Initialize the scratch register pointer.  This will cause
   1781 	 * the scratch register values to be written out to memory
   1782 	 * whenever they are updated.
   1783 	 *
   1784 	 * We simply put this behind the ring read pointer, this works
   1785 	 * with PCI GART as well as (whatever kind of) AGP GART
   1786 	 */
   1787 
   1788 	{
   1789 		u64 scratch_addr;
   1790 
   1791 		scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR);
   1792 		scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
   1793 		scratch_addr += R600_SCRATCH_REG_OFFSET;
   1794 		scratch_addr >>= 8;
   1795 		scratch_addr &= 0xffffffff;
   1796 
   1797 		RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr);
   1798 	}
   1799 
   1800 	dev_priv->scratch = ((__volatile__ u32 *)
   1801 			     dev_priv->ring_rptr->handle +
   1802 			     (R600_SCRATCH_REG_OFFSET / sizeof(u32)));
   1803 
   1804 	RADEON_WRITE(R600_SCRATCH_UMSK, 0x7);
   1805 
   1806 	dev_priv->sarea_priv->last_frame = dev_priv->scratch[0] = 0;
   1807 	RADEON_WRITE(R600_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame);
   1808 
   1809 	dev_priv->sarea_priv->last_dispatch = dev_priv->scratch[1] = 0;
   1810 	RADEON_WRITE(R600_LAST_DISPATCH_REG,
   1811 		     dev_priv->sarea_priv->last_dispatch);
   1812 
   1813 	dev_priv->sarea_priv->last_clear = dev_priv->scratch[2] = 0;
   1814 	RADEON_WRITE(R600_LAST_CLEAR_REG, dev_priv->sarea_priv->last_clear);
   1815 
   1816 	r600_do_wait_for_idle(dev_priv);
   1817 
   1818 }
   1819 
   1820 int r600_do_cleanup_cp(struct drm_device * dev)
   1821 {
   1822 	drm_radeon_private_t *dev_priv = dev->dev_private;
   1823 	DRM_DEBUG("\n");
   1824 
   1825 	/* Make sure interrupts are disabled here because the uninstall ioctl
   1826 	 * may not have been called from userspace and after dev_private
   1827 	 * is freed, it's too late.
   1828 	 */
   1829 	if (dev->irq_enabled)
   1830 		drm_irq_uninstall(dev);
   1831 
   1832 #if __OS_HAS_AGP
   1833 	if (dev_priv->flags & RADEON_IS_AGP) {
   1834 		if (dev_priv->cp_ring != NULL) {
   1835 			drm_core_ioremapfree(dev_priv->cp_ring, dev);
   1836 			dev_priv->cp_ring = NULL;
   1837 		}
   1838 		if (dev_priv->ring_rptr != NULL) {
   1839 			drm_core_ioremapfree(dev_priv->ring_rptr, dev);
   1840 			dev_priv->ring_rptr = NULL;
   1841 		}
   1842 		if (dev->agp_buffer_map != NULL) {
   1843 			drm_core_ioremapfree(dev->agp_buffer_map, dev);
   1844 			dev->agp_buffer_map = NULL;
   1845 		}
   1846 	} else
   1847 #endif
   1848 	{
   1849 
   1850 		if (dev_priv->gart_info.mapping.handle) {
   1851 			r600_page_table_cleanup(dev, &dev_priv->gart_info);
   1852 			drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
   1853 			dev_priv->gart_info.addr = 0;
   1854 			dev_priv->gart_info.mapping.handle = 0;
   1855 		}
   1856 	}
   1857 	/* only clear to the start of flags */
   1858 	memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags));
   1859 
   1860 	return 0;
   1861 }
   1862 
   1863 int r600_do_init_cp(struct drm_device * dev, drm_radeon_init_t * init)
   1864 {
   1865 	drm_radeon_private_t *dev_priv = dev->dev_private;
   1866 
   1867 	DRM_DEBUG("\n");
   1868 
   1869 	/* if we require new memory map but we don't have it fail */
   1870 	if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
   1871 		DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
   1872 		r600_do_cleanup_cp(dev);
   1873 		return -EINVAL;
   1874 	}
   1875 
   1876 	if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP))
   1877 	{
   1878 		DRM_DEBUG("Forcing AGP card to PCI mode\n");
   1879 		dev_priv->flags &= ~RADEON_IS_AGP;
   1880 		/* The writeback test succeeds, but when writeback is enabled,
   1881 		 * the ring buffer read ptr update fails after first 128 bytes.
   1882 		 */
   1883 		radeon_no_wb = 1;
   1884 	}
   1885 	else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE))
   1886 		 && !init->is_pci)
   1887 	{
   1888 		DRM_DEBUG("Restoring AGP flag\n");
   1889 		dev_priv->flags |= RADEON_IS_AGP;
   1890 	}
   1891 
   1892 	dev_priv->usec_timeout = init->usec_timeout;
   1893 	if (dev_priv->usec_timeout < 1 ||
   1894 	    dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) {
   1895 		DRM_DEBUG("TIMEOUT problem!\n");
   1896 		r600_do_cleanup_cp(dev);
   1897 		return -EINVAL;
   1898 	}
   1899 
   1900 	/* Enable vblank on CRTC1 for older X servers
   1901 	 */
   1902 	dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
   1903 
   1904 	dev_priv->do_boxes = 0;
   1905 	dev_priv->cp_mode = init->cp_mode;
   1906 
   1907 	/* We don't support anything other than bus-mastering ring mode,
   1908 	 * but the ring can be in either AGP or PCI space for the ring
   1909 	 * read pointer.
   1910 	 */
   1911 	if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) &&
   1912 	    (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) {
   1913 		DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode);
   1914 		r600_do_cleanup_cp(dev);
   1915 		return -EINVAL;
   1916 	}
   1917 
   1918 	switch (init->fb_bpp) {
   1919 	case 16:
   1920 		dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565;
   1921 		break;
   1922 	case 32:
   1923 	default:
   1924 		dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
   1925 		break;
   1926 	}
   1927 	dev_priv->front_offset = init->front_offset;
   1928 	dev_priv->front_pitch = init->front_pitch;
   1929 	dev_priv->back_offset = init->back_offset;
   1930 	dev_priv->back_pitch = init->back_pitch;
   1931 
   1932 	dev_priv->ring_offset = init->ring_offset;
   1933 	dev_priv->ring_rptr_offset = init->ring_rptr_offset;
   1934 	dev_priv->buffers_offset = init->buffers_offset;
   1935 	dev_priv->gart_textures_offset = init->gart_textures_offset;
   1936 
   1937 	dev_priv->sarea = drm_getsarea(dev);
   1938 	if (!dev_priv->sarea) {
   1939 		DRM_ERROR("could not find sarea!\n");
   1940 		r600_do_cleanup_cp(dev);
   1941 		return -EINVAL;
   1942 	}
   1943 
   1944 	dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
   1945 	if (!dev_priv->cp_ring) {
   1946 		DRM_ERROR("could not find cp ring region!\n");
   1947 		r600_do_cleanup_cp(dev);
   1948 		return -EINVAL;
   1949 	}
   1950 	dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
   1951 	if (!dev_priv->ring_rptr) {
   1952 		DRM_ERROR("could not find ring read pointer!\n");
   1953 		r600_do_cleanup_cp(dev);
   1954 		return -EINVAL;
   1955 	}
   1956 	dev->agp_buffer_token = init->buffers_offset;
   1957 	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
   1958 	if (!dev->agp_buffer_map) {
   1959 		DRM_ERROR("could not find dma buffer region!\n");
   1960 		r600_do_cleanup_cp(dev);
   1961 		return -EINVAL;
   1962 	}
   1963 
   1964 	if (init->gart_textures_offset) {
   1965 		dev_priv->gart_textures =
   1966 		    drm_core_findmap(dev, init->gart_textures_offset);
   1967 		if (!dev_priv->gart_textures) {
   1968 			DRM_ERROR("could not find GART texture region!\n");
   1969 			r600_do_cleanup_cp(dev);
   1970 			return -EINVAL;
   1971 		}
   1972 	}
   1973 
   1974 	dev_priv->sarea_priv =
   1975 	    (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle +
   1976 				    init->sarea_priv_offset);
   1977 
   1978 #if __OS_HAS_AGP
   1979 	// XXX
   1980 	if (dev_priv->flags & RADEON_IS_AGP) {
   1981 		drm_core_ioremap_wc(dev_priv->cp_ring, dev);
   1982 		drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
   1983 		drm_core_ioremap_wc(dev->agp_buffer_map, dev);
   1984 		if (!dev_priv->cp_ring->handle ||
   1985 		    !dev_priv->ring_rptr->handle ||
   1986 		    !dev->agp_buffer_map->handle) {
   1987 			DRM_ERROR("could not find ioremap agp regions!\n");
   1988 			r600_do_cleanup_cp(dev);
   1989 			return -EINVAL;
   1990 		}
   1991 	} else
   1992 #endif
   1993 	{
   1994 		dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
   1995 		dev_priv->ring_rptr->handle =
   1996 		    (void *)dev_priv->ring_rptr->offset;
   1997 		dev->agp_buffer_map->handle =
   1998 		    (void *)dev->agp_buffer_map->offset;
   1999 
   2000 		DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
   2001 			  dev_priv->cp_ring->handle);
   2002 		DRM_DEBUG("dev_priv->ring_rptr->handle %p\n",
   2003 			  dev_priv->ring_rptr->handle);
   2004 		DRM_DEBUG("dev->agp_buffer_map->handle %p\n",
   2005 			  dev->agp_buffer_map->handle);
   2006 	}
   2007 
   2008 	dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24;
   2009 	dev_priv->fb_size =
   2010 		(((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000)
   2011 		- dev_priv->fb_location;
   2012 
   2013 	dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) |
   2014 					((dev_priv->front_offset
   2015 					  + dev_priv->fb_location) >> 10));
   2016 
   2017 	dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) |
   2018 				       ((dev_priv->back_offset
   2019 					 + dev_priv->fb_location) >> 10));
   2020 
   2021 	dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) |
   2022 					((dev_priv->depth_offset
   2023 					  + dev_priv->fb_location) >> 10));
   2024 
   2025 	dev_priv->gart_size = init->gart_size;
   2026 
   2027 	/* New let's set the memory map ... */
   2028 	if (dev_priv->new_memmap) {
   2029 		u32 base = 0;
   2030 
   2031 		DRM_INFO("Setting GART location based on new memory map\n");
   2032 
   2033 		/* If using AGP, try to locate the AGP aperture at the same
   2034 		 * location in the card and on the bus, though we have to
   2035 		 * align it down.
   2036 		 */
   2037 #if __OS_HAS_AGP
   2038 		// XXX
   2039 		if (dev_priv->flags & RADEON_IS_AGP) {
   2040 			base = dev->agp->base;
   2041 			/* Check if valid */
   2042 			if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location &&
   2043 			    base < (dev_priv->fb_location + dev_priv->fb_size - 1)) {
   2044 				DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n",
   2045 					 dev->agp->base);
   2046 				base = 0;
   2047 			}
   2048 		}
   2049 #endif
   2050 		/* If not or if AGP is at 0 (Macs), try to put it elsewhere */
   2051 		if (base == 0) {
   2052 			base = dev_priv->fb_location + dev_priv->fb_size;
   2053 			if (base < dev_priv->fb_location ||
   2054 			    ((base + dev_priv->gart_size) & 0xfffffffful) < base)
   2055 				base = dev_priv->fb_location
   2056 					- dev_priv->gart_size;
   2057 		}
   2058 		dev_priv->gart_vm_start = base & 0xffc00000u;
   2059 		if (dev_priv->gart_vm_start != base)
   2060 			DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n",
   2061 				 base, dev_priv->gart_vm_start);
   2062 	}
   2063 
   2064 #if __OS_HAS_AGP
   2065 	// XXX
   2066 	if (dev_priv->flags & RADEON_IS_AGP)
   2067 		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
   2068 						 - dev->agp->base
   2069 						 + dev_priv->gart_vm_start);
   2070 	else
   2071 #endif
   2072 		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
   2073 						 - (unsigned long)dev->sg->virtual
   2074 						 + dev_priv->gart_vm_start);
   2075 
   2076 	DRM_DEBUG("fb 0x%08x size %d\n",
   2077 	          (unsigned int) dev_priv->fb_location,
   2078 	          (unsigned int) dev_priv->fb_size);
   2079 	DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size);
   2080 	DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n",
   2081 	          (unsigned int) dev_priv->gart_vm_start);
   2082 	DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n",
   2083 	          dev_priv->gart_buffers_offset);
   2084 
   2085 	dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle;
   2086 	dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
   2087 			      + init->ring_size / sizeof(u32));
   2088 	dev_priv->ring.size = init->ring_size;
   2089 	dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
   2090 
   2091 	dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
   2092 	dev_priv->ring.rptr_update_l2qw = drm_order( /* init->rptr_update */ 4096 / 8);
   2093 
   2094 	dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
   2095 	dev_priv->ring.fetch_size_l2ow = drm_order( /* init->fetch_size */ 32 / 16);
   2096 
   2097 	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
   2098 
   2099 	dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
   2100 
   2101 #if __OS_HAS_AGP
   2102 	if (dev_priv->flags & RADEON_IS_AGP) {
   2103 		// XXX turn off pcie gart
   2104 	} else
   2105 #endif
   2106 	{
   2107 		dev_priv->gart_info.table_mask = DMA_BIT_MASK(32);
   2108 		/* if we have an offset set from userspace */
   2109 		if (!dev_priv->pcigart_offset_set) {
   2110 			DRM_ERROR("Need gart offset from userspace\n");
   2111 			r600_do_cleanup_cp(dev);
   2112 			return -EINVAL;
   2113 		}
   2114 
   2115 		DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset);
   2116 
   2117 		dev_priv->gart_info.bus_addr =
   2118 			dev_priv->pcigart_offset + dev_priv->fb_location;
   2119 		dev_priv->gart_info.mapping.offset =
   2120 			dev_priv->pcigart_offset + dev_priv->fb_aper_offset;
   2121 		dev_priv->gart_info.mapping.size =
   2122 			dev_priv->gart_info.table_size;
   2123 
   2124 		drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
   2125 		if (!dev_priv->gart_info.mapping.handle) {
   2126 			DRM_ERROR("ioremap failed.\n");
   2127 			r600_do_cleanup_cp(dev);
   2128 			return -EINVAL;
   2129 		}
   2130 
   2131 		dev_priv->gart_info.addr =
   2132 			dev_priv->gart_info.mapping.handle;
   2133 
   2134 		DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n",
   2135 			  dev_priv->gart_info.addr,
   2136 			  dev_priv->pcigart_offset);
   2137 
   2138 		if (!r600_page_table_init(dev)) {
   2139 			DRM_ERROR("Failed to init GART table\n");
   2140 			r600_do_cleanup_cp(dev);
   2141 			return -EINVAL;
   2142 		}
   2143 
   2144 		if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
   2145 			r700_vm_init(dev);
   2146 		else
   2147 			r600_vm_init(dev);
   2148 	}
   2149 
   2150 	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770))
   2151 	    r700_cp_load_microcode(dev_priv);
   2152 	else
   2153 	    r600_cp_load_microcode(dev_priv);
   2154 
   2155 	r600_cp_init_ring_buffer(dev, dev_priv);
   2156 
   2157 	dev_priv->last_buf = 0;
   2158 
   2159 	r600_do_engine_reset(dev);
   2160 	r600_test_writeback(dev_priv);
   2161 
   2162 	r600_cs_init(dev);
   2163 
   2164 	return 0;
   2165 }
   2166 
   2167 int r600_do_resume_cp(struct drm_device * dev)
   2168 {
   2169 	drm_radeon_private_t *dev_priv = dev->dev_private;
   2170 
   2171 	DRM_DEBUG("\n");
   2172 	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
   2173 		r700_vm_init(dev);
   2174 		r700_cp_load_microcode(dev_priv);
   2175 	} else {
   2176 		r600_vm_init(dev);
   2177 		r600_cp_load_microcode(dev_priv);
   2178 	}
   2179 	r600_cp_init_ring_buffer(dev, dev_priv);
   2180 	r600_do_engine_reset(dev);
   2181 
   2182 	return 0;
   2183 }
   2184 
   2185 /* Wait for the CP to go idle.
   2186  */
   2187 int r600_do_cp_idle(drm_radeon_private_t *dev_priv)
   2188 {
   2189 	RING_LOCALS;
   2190 	DRM_DEBUG("\n");
   2191 
   2192 	BEGIN_RING(5);
   2193 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
   2194 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
   2195 	/* wait for 3D idle clean */
   2196 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
   2197 	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
   2198 	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
   2199 
   2200 	ADVANCE_RING();
   2201 	COMMIT_RING();
   2202 
   2203 	return r600_do_wait_for_idle(dev_priv);
   2204 }
   2205 
   2206 /* Start the Command Processor.
   2207  */
   2208 void r600_do_cp_start(drm_radeon_private_t * dev_priv)
   2209 {
   2210 	u32 cp_me;
   2211 	RING_LOCALS;
   2212 	DRM_DEBUG("\n");
   2213 
   2214 	BEGIN_RING(7);
   2215 	OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5));
   2216 	OUT_RING(0x00000001);
   2217 	if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_CEDAR)) {
   2218 		OUT_RING(0x00000000);
   2219 		/* XXXMRG add/use evergreen_max_hw_contexts */
   2220 		OUT_RING((dev_priv->r600_max_hw_contexts - 1));
   2221 	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) {
   2222 		OUT_RING(0x00000000);
   2223 		/* XXXMRG add/use rv770_max_hw_contexts */
   2224 		OUT_RING((dev_priv->r600_max_hw_contexts - 1));
   2225         } else {
   2226 		OUT_RING(0x00000003);
   2227 		OUT_RING((dev_priv->r600_max_hw_contexts - 1));
   2228 	}
   2229 	OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1));
   2230 	OUT_RING(0x00000000);
   2231 	OUT_RING(0x00000000);
   2232 	ADVANCE_RING();
   2233         COMMIT_RING();
   2234 
   2235 	/* set the mux and reset the halt bit */
   2236 	cp_me = 0xff;
   2237 	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
   2238 
   2239 	dev_priv->cp_running = 1;
   2240 
   2241 }
   2242 
   2243 void r600_do_cp_reset(drm_radeon_private_t * dev_priv)
   2244 {
   2245 	u32 cur_read_ptr;
   2246 	DRM_DEBUG("\n");
   2247 
   2248 	cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR);
   2249 	RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr);
   2250 	SET_RING_HEAD(dev_priv, cur_read_ptr);
   2251 	dev_priv->ring.tail = cur_read_ptr;
   2252 }
   2253 
   2254 void r600_do_cp_stop(drm_radeon_private_t * dev_priv)
   2255 {
   2256 	uint32_t cp_me;
   2257 
   2258 	DRM_DEBUG("\n");
   2259 
   2260 	cp_me = 0xff | R600_CP_ME_HALT;
   2261 
   2262 	RADEON_WRITE(R600_CP_ME_CNTL, cp_me);
   2263 
   2264 	dev_priv->cp_running = 0;
   2265 }
   2266 
   2267 int r600_cp_dispatch_indirect(struct drm_device *dev,
   2268 			      struct drm_buf *buf, int start, int end)
   2269 {
   2270 	drm_radeon_private_t *dev_priv = dev->dev_private;
   2271 	RING_LOCALS;
   2272 
   2273 	if (start != end) {
   2274 		unsigned long offset = (dev_priv->gart_buffers_offset
   2275 					+ buf->offset + start);
   2276 		int dwords = (end - start + 3) / sizeof(u32);
   2277 
   2278 		DRM_DEBUG("dwords:%d\n", dwords);
   2279 		DRM_DEBUG("offset 0x%lx\n", offset);
   2280 
   2281 
   2282 		/* Indirect buffer data must be a multiple of 16 dwords.
   2283 		 * pad the data with a Type-2 CP packet.
   2284 		 */
   2285 		while (dwords & 0xf) {
   2286 			u32 *data = (u32 *)
   2287 			    ((char *)dev->agp_buffer_map->handle
   2288 			     + buf->offset + start);
   2289 			data[dwords++] = RADEON_CP_PACKET2;
   2290 		}
   2291 
   2292 		/* Fire off the indirect buffer */
   2293 		BEGIN_RING(4);
   2294 		OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2));
   2295 		OUT_RING((offset & 0xfffffffc));
   2296 		OUT_RING((upper_32_bits(offset) & 0xff));
   2297 		OUT_RING(dwords);
   2298 		ADVANCE_RING();
   2299 	}
   2300 
   2301 	return 0;
   2302 }
   2303 
   2304 void r600_cp_dispatch_swap(struct drm_device * dev)
   2305 {
   2306 	drm_radeon_private_t *dev_priv = dev->dev_private;
   2307 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
   2308 	int nbox = sarea_priv->nbox;
   2309 	struct drm_clip_rect *pbox = sarea_priv->boxes;
   2310 	int i, cpp, src_pitch, dst_pitch;
   2311 	uint64_t src, dst;
   2312 	RING_LOCALS;
   2313 	DRM_DEBUG("\n");
   2314 
   2315 	if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
   2316 		cpp = 4;
   2317 	else
   2318 		cpp = 2;
   2319 
   2320 	if (dev_priv->sarea_priv->pfCurrentPage == 0) {
   2321 		src_pitch = dev_priv->back_pitch;
   2322 		dst_pitch = dev_priv->front_pitch;
   2323 		src = dev_priv->back_offset + dev_priv->fb_location;
   2324 		dst = dev_priv->front_offset + dev_priv->fb_location;
   2325 	} else {
   2326 		src_pitch = dev_priv->front_pitch;
   2327 		dst_pitch = dev_priv->back_pitch;
   2328 		src = dev_priv->front_offset + dev_priv->fb_location;
   2329 		dst = dev_priv->back_offset + dev_priv->fb_location;
   2330 	}
   2331 
   2332 	if (r600_prepare_blit_copy(dev)) {
   2333 		DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
   2334 		return;
   2335 	}
   2336 	for (i = 0; i < nbox; i++) {
   2337 		int x = pbox[i].x1;
   2338 		int y = pbox[i].y1;
   2339 		int w = pbox[i].x2 - x;
   2340 		int h = pbox[i].y2 - y;
   2341 
   2342 		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
   2343 
   2344 		r600_blit_swap(dev,
   2345 			       src, dst,
   2346 			       x, y, x, y, w, h,
   2347 			       src_pitch, dst_pitch, cpp);
   2348 	}
   2349 	r600_done_blit_copy(dev);
   2350 
   2351 	/* Increment the frame counter.  The client-side 3D driver must
   2352 	 * throttle the framerate by waiting for this value before
   2353 	 * performing the swapbuffer ioctl.
   2354 	 */
   2355 	dev_priv->sarea_priv->last_frame++;
   2356 
   2357 	BEGIN_RING(3);
   2358 	R600_FRAME_AGE(dev_priv->sarea_priv->last_frame);
   2359 	ADVANCE_RING();
   2360 }
   2361 
   2362 int r600_cp_dispatch_texture(struct drm_device * dev,
   2363 			     struct drm_file *file_priv,
   2364 			     drm_radeon_texture_t * tex,
   2365 			     drm_radeon_tex_image_t * image)
   2366 {
   2367 	drm_radeon_private_t *dev_priv = dev->dev_private;
   2368 	struct drm_buf *buf;
   2369 	u32 *buffer;
   2370 	const u8 __user *data;
   2371 	int size, pass_size;
   2372 	u64 src_offset, dst_offset;
   2373 
   2374 	if (!radeon_check_offset(dev_priv, tex->offset)) {
   2375 		DRM_ERROR("Invalid destination offset\n");
   2376 		return -EINVAL;
   2377 	}
   2378 
   2379 	/* this might fail for zero-sized uploads - are those illegal? */
   2380 	if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
   2381 		DRM_ERROR("Invalid final destination offset\n");
   2382 		return -EINVAL;
   2383 	}
   2384 
   2385 	size = tex->height * tex->pitch;
   2386 
   2387 	if (size == 0)
   2388 		return 0;
   2389 
   2390 	dst_offset = tex->offset;
   2391 
   2392 	r600_prepare_blit_copy(dev);
   2393 	do {
   2394 		data = (const u8 __user *)image->data;
   2395 		pass_size = size;
   2396 
   2397 		buf = radeon_freelist_get(dev);
   2398 		if (!buf) {
   2399 			DRM_DEBUG("EAGAIN\n");
   2400 			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
   2401 				return -EFAULT;
   2402 			return -EAGAIN;
   2403 		}
   2404 
   2405 		if (pass_size > buf->total)
   2406 			pass_size = buf->total;
   2407 
   2408 		/* Dispatch the indirect buffer.
   2409 		 */
   2410 		buffer =
   2411 		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
   2412 
   2413 		if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {
   2414 			DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
   2415 			return -EFAULT;
   2416 		}
   2417 
   2418 		buf->file_priv = file_priv;
   2419 		buf->used = pass_size;
   2420 		src_offset = dev_priv->gart_buffers_offset + buf->offset;
   2421 
   2422 		r600_blit_copy(dev, src_offset, dst_offset, pass_size);
   2423 
   2424 		radeon_cp_discard_buffer(dev, buf);
   2425 
   2426 		/* Update the input parameters for next time */
   2427 		image->data = (const u8 __user *)image->data + pass_size;
   2428 		dst_offset += pass_size;
   2429 		size -= pass_size;
   2430 	} while (size > 0);
   2431 	r600_done_blit_copy(dev);
   2432 
   2433 	return 0;
   2434 }
   2435