Home | History | Annotate | Line # | Download | only in radeon
radeon_cik.c revision 1.1.6.2
      1 /*	$NetBSD: radeon_cik.c,v 1.1.6.2 2019/06/10 22:08:26 christos Exp $	*/
      2 
      3 /*
      4  * Copyright 2012 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors: Alex Deucher
     25  */
     26 #include <sys/cdefs.h>
     27 __KERNEL_RCSID(0, "$NetBSD: radeon_cik.c,v 1.1.6.2 2019/06/10 22:08:26 christos Exp $");
     28 
     29 #include <linux/firmware.h>
     30 #include <linux/slab.h>
     31 #include <linux/module.h>
     32 #include "drmP.h"
     33 #include "radeon.h"
     34 #include "radeon_asic.h"
     35 #include "radeon_audio.h"
     36 #include "cikd.h"
     37 #include "atom.h"
     38 #include "cik_blit_shaders.h"
     39 #include "radeon_ucode.h"
     40 #include "clearstate_ci.h"
     41 #include "radeon_kfd.h"
     42 
     43 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
     44 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
     45 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
     46 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
     47 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
     48 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
     49 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
     50 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
     51 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
     52 
     53 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
     54 MODULE_FIRMWARE("radeon/bonaire_me.bin");
     55 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
     56 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
     57 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
     58 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
     59 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
     60 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
     61 
     62 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
     63 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
     64 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
     65 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
     66 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
     67 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
     68 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
     69 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
     70 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
     71 
     72 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
     73 MODULE_FIRMWARE("radeon/hawaii_me.bin");
     74 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
     75 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
     76 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
     77 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
     78 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
     79 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
     80 
     81 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
     82 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
     83 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
     84 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
     85 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
     86 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
     87 
     88 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
     89 MODULE_FIRMWARE("radeon/kaveri_me.bin");
     90 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
     91 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
     92 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
     93 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
     94 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
     95 
     96 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
     97 MODULE_FIRMWARE("radeon/KABINI_me.bin");
     98 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
     99 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
    100 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
    101 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
    102 
    103 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
    104 MODULE_FIRMWARE("radeon/kabini_me.bin");
    105 MODULE_FIRMWARE("radeon/kabini_ce.bin");
    106 MODULE_FIRMWARE("radeon/kabini_mec.bin");
    107 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
    108 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
    109 
    110 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
    111 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
    112 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
    113 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
    114 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
    115 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
    116 
    117 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
    118 MODULE_FIRMWARE("radeon/mullins_me.bin");
    119 MODULE_FIRMWARE("radeon/mullins_ce.bin");
    120 MODULE_FIRMWARE("radeon/mullins_mec.bin");
    121 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
    122 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
    123 
    124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
    125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
    126 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
    127 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
    128 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
    129 extern void sumo_rlc_fini(struct radeon_device *rdev);
    130 extern int sumo_rlc_init(struct radeon_device *rdev);
    131 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
    132 extern void si_rlc_reset(struct radeon_device *rdev);
    133 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
    134 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
    135 extern int cik_sdma_resume(struct radeon_device *rdev);
    136 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
    137 extern void cik_sdma_fini(struct radeon_device *rdev);
    138 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
    139 static void cik_rlc_stop(struct radeon_device *rdev);
    140 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
    141 static void cik_program_aspm(struct radeon_device *rdev);
    142 static void cik_init_pg(struct radeon_device *rdev);
    143 static void cik_init_cg(struct radeon_device *rdev);
    144 static void cik_fini_pg(struct radeon_device *rdev);
    145 static void cik_fini_cg(struct radeon_device *rdev);
    146 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
    147 					  bool enable);
    148 
    149 /**
    150  * cik_get_allowed_info_register - fetch the register for the info ioctl
    151  *
    152  * @rdev: radeon_device pointer
    153  * @reg: register offset in bytes
    154  * @val: register value
    155  *
    156  * Returns 0 for success or -EINVAL for an invalid register
    157  *
    158  */
    159 int cik_get_allowed_info_register(struct radeon_device *rdev,
    160 				  u32 reg, u32 *val)
    161 {
    162 	switch (reg) {
    163 	case GRBM_STATUS:
    164 	case GRBM_STATUS2:
    165 	case GRBM_STATUS_SE0:
    166 	case GRBM_STATUS_SE1:
    167 	case GRBM_STATUS_SE2:
    168 	case GRBM_STATUS_SE3:
    169 	case SRBM_STATUS:
    170 	case SRBM_STATUS2:
    171 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
    172 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
    173 	case UVD_STATUS:
    174 	/* TODO VCE */
    175 		*val = RREG32(reg);
    176 		return 0;
    177 	default:
    178 		return -EINVAL;
    179 	}
    180 }
    181 
    182 /*
    183  * Indirect registers accessor
    184  */
    185 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
    186 {
    187 	unsigned long flags;
    188 	u32 r;
    189 
    190 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
    191 	WREG32(CIK_DIDT_IND_INDEX, (reg));
    192 	r = RREG32(CIK_DIDT_IND_DATA);
    193 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
    194 	return r;
    195 }
    196 
    197 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
    198 {
    199 	unsigned long flags;
    200 
    201 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
    202 	WREG32(CIK_DIDT_IND_INDEX, (reg));
    203 	WREG32(CIK_DIDT_IND_DATA, (v));
    204 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
    205 }
    206 
    207 /* get temperature in millidegrees */
    208 int ci_get_temp(struct radeon_device *rdev)
    209 {
    210 	u32 temp;
    211 	int actual_temp = 0;
    212 
    213 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
    214 		CTF_TEMP_SHIFT;
    215 
    216 	if (temp & 0x200)
    217 		actual_temp = 255;
    218 	else
    219 		actual_temp = temp & 0x1ff;
    220 
    221 	actual_temp = actual_temp * 1000;
    222 
    223 	return actual_temp;
    224 }
    225 
    226 /* get temperature in millidegrees */
    227 int kv_get_temp(struct radeon_device *rdev)
    228 {
    229 	u32 temp;
    230 	int actual_temp = 0;
    231 
    232 	temp = RREG32_SMC(0xC0300E0C);
    233 
    234 	if (temp)
    235 		actual_temp = (temp / 8) - 49;
    236 	else
    237 		actual_temp = 0;
    238 
    239 	actual_temp = actual_temp * 1000;
    240 
    241 	return actual_temp;
    242 }
    243 
    244 /*
    245  * Indirect registers accessor
    246  */
    247 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
    248 {
    249 	unsigned long flags;
    250 	u32 r;
    251 
    252 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
    253 	WREG32(PCIE_INDEX, reg);
    254 	(void)RREG32(PCIE_INDEX);
    255 	r = RREG32(PCIE_DATA);
    256 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
    257 	return r;
    258 }
    259 
    260 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
    261 {
    262 	unsigned long flags;
    263 
    264 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
    265 	WREG32(PCIE_INDEX, reg);
    266 	(void)RREG32(PCIE_INDEX);
    267 	WREG32(PCIE_DATA, v);
    268 	(void)RREG32(PCIE_DATA);
    269 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
    270 }
    271 
    272 static const u32 spectre_rlc_save_restore_register_list[] =
    273 {
    274 	(0x0e00 << 16) | (0xc12c >> 2),
    275 	0x00000000,
    276 	(0x0e00 << 16) | (0xc140 >> 2),
    277 	0x00000000,
    278 	(0x0e00 << 16) | (0xc150 >> 2),
    279 	0x00000000,
    280 	(0x0e00 << 16) | (0xc15c >> 2),
    281 	0x00000000,
    282 	(0x0e00 << 16) | (0xc168 >> 2),
    283 	0x00000000,
    284 	(0x0e00 << 16) | (0xc170 >> 2),
    285 	0x00000000,
    286 	(0x0e00 << 16) | (0xc178 >> 2),
    287 	0x00000000,
    288 	(0x0e00 << 16) | (0xc204 >> 2),
    289 	0x00000000,
    290 	(0x0e00 << 16) | (0xc2b4 >> 2),
    291 	0x00000000,
    292 	(0x0e00 << 16) | (0xc2b8 >> 2),
    293 	0x00000000,
    294 	(0x0e00 << 16) | (0xc2bc >> 2),
    295 	0x00000000,
    296 	(0x0e00 << 16) | (0xc2c0 >> 2),
    297 	0x00000000,
    298 	(0x0e00 << 16) | (0x8228 >> 2),
    299 	0x00000000,
    300 	(0x0e00 << 16) | (0x829c >> 2),
    301 	0x00000000,
    302 	(0x0e00 << 16) | (0x869c >> 2),
    303 	0x00000000,
    304 	(0x0600 << 16) | (0x98f4 >> 2),
    305 	0x00000000,
    306 	(0x0e00 << 16) | (0x98f8 >> 2),
    307 	0x00000000,
    308 	(0x0e00 << 16) | (0x9900 >> 2),
    309 	0x00000000,
    310 	(0x0e00 << 16) | (0xc260 >> 2),
    311 	0x00000000,
    312 	(0x0e00 << 16) | (0x90e8 >> 2),
    313 	0x00000000,
    314 	(0x0e00 << 16) | (0x3c000 >> 2),
    315 	0x00000000,
    316 	(0x0e00 << 16) | (0x3c00c >> 2),
    317 	0x00000000,
    318 	(0x0e00 << 16) | (0x8c1c >> 2),
    319 	0x00000000,
    320 	(0x0e00 << 16) | (0x9700 >> 2),
    321 	0x00000000,
    322 	(0x0e00 << 16) | (0xcd20 >> 2),
    323 	0x00000000,
    324 	(0x4e00 << 16) | (0xcd20 >> 2),
    325 	0x00000000,
    326 	(0x5e00 << 16) | (0xcd20 >> 2),
    327 	0x00000000,
    328 	(0x6e00 << 16) | (0xcd20 >> 2),
    329 	0x00000000,
    330 	(0x7e00 << 16) | (0xcd20 >> 2),
    331 	0x00000000,
    332 	(0x8e00 << 16) | (0xcd20 >> 2),
    333 	0x00000000,
    334 	(0x9e00 << 16) | (0xcd20 >> 2),
    335 	0x00000000,
    336 	(0xae00 << 16) | (0xcd20 >> 2),
    337 	0x00000000,
    338 	(0xbe00 << 16) | (0xcd20 >> 2),
    339 	0x00000000,
    340 	(0x0e00 << 16) | (0x89bc >> 2),
    341 	0x00000000,
    342 	(0x0e00 << 16) | (0x8900 >> 2),
    343 	0x00000000,
    344 	0x3,
    345 	(0x0e00 << 16) | (0xc130 >> 2),
    346 	0x00000000,
    347 	(0x0e00 << 16) | (0xc134 >> 2),
    348 	0x00000000,
    349 	(0x0e00 << 16) | (0xc1fc >> 2),
    350 	0x00000000,
    351 	(0x0e00 << 16) | (0xc208 >> 2),
    352 	0x00000000,
    353 	(0x0e00 << 16) | (0xc264 >> 2),
    354 	0x00000000,
    355 	(0x0e00 << 16) | (0xc268 >> 2),
    356 	0x00000000,
    357 	(0x0e00 << 16) | (0xc26c >> 2),
    358 	0x00000000,
    359 	(0x0e00 << 16) | (0xc270 >> 2),
    360 	0x00000000,
    361 	(0x0e00 << 16) | (0xc274 >> 2),
    362 	0x00000000,
    363 	(0x0e00 << 16) | (0xc278 >> 2),
    364 	0x00000000,
    365 	(0x0e00 << 16) | (0xc27c >> 2),
    366 	0x00000000,
    367 	(0x0e00 << 16) | (0xc280 >> 2),
    368 	0x00000000,
    369 	(0x0e00 << 16) | (0xc284 >> 2),
    370 	0x00000000,
    371 	(0x0e00 << 16) | (0xc288 >> 2),
    372 	0x00000000,
    373 	(0x0e00 << 16) | (0xc28c >> 2),
    374 	0x00000000,
    375 	(0x0e00 << 16) | (0xc290 >> 2),
    376 	0x00000000,
    377 	(0x0e00 << 16) | (0xc294 >> 2),
    378 	0x00000000,
    379 	(0x0e00 << 16) | (0xc298 >> 2),
    380 	0x00000000,
    381 	(0x0e00 << 16) | (0xc29c >> 2),
    382 	0x00000000,
    383 	(0x0e00 << 16) | (0xc2a0 >> 2),
    384 	0x00000000,
    385 	(0x0e00 << 16) | (0xc2a4 >> 2),
    386 	0x00000000,
    387 	(0x0e00 << 16) | (0xc2a8 >> 2),
    388 	0x00000000,
    389 	(0x0e00 << 16) | (0xc2ac  >> 2),
    390 	0x00000000,
    391 	(0x0e00 << 16) | (0xc2b0 >> 2),
    392 	0x00000000,
    393 	(0x0e00 << 16) | (0x301d0 >> 2),
    394 	0x00000000,
    395 	(0x0e00 << 16) | (0x30238 >> 2),
    396 	0x00000000,
    397 	(0x0e00 << 16) | (0x30250 >> 2),
    398 	0x00000000,
    399 	(0x0e00 << 16) | (0x30254 >> 2),
    400 	0x00000000,
    401 	(0x0e00 << 16) | (0x30258 >> 2),
    402 	0x00000000,
    403 	(0x0e00 << 16) | (0x3025c >> 2),
    404 	0x00000000,
    405 	(0x4e00 << 16) | (0xc900 >> 2),
    406 	0x00000000,
    407 	(0x5e00 << 16) | (0xc900 >> 2),
    408 	0x00000000,
    409 	(0x6e00 << 16) | (0xc900 >> 2),
    410 	0x00000000,
    411 	(0x7e00 << 16) | (0xc900 >> 2),
    412 	0x00000000,
    413 	(0x8e00 << 16) | (0xc900 >> 2),
    414 	0x00000000,
    415 	(0x9e00 << 16) | (0xc900 >> 2),
    416 	0x00000000,
    417 	(0xae00 << 16) | (0xc900 >> 2),
    418 	0x00000000,
    419 	(0xbe00 << 16) | (0xc900 >> 2),
    420 	0x00000000,
    421 	(0x4e00 << 16) | (0xc904 >> 2),
    422 	0x00000000,
    423 	(0x5e00 << 16) | (0xc904 >> 2),
    424 	0x00000000,
    425 	(0x6e00 << 16) | (0xc904 >> 2),
    426 	0x00000000,
    427 	(0x7e00 << 16) | (0xc904 >> 2),
    428 	0x00000000,
    429 	(0x8e00 << 16) | (0xc904 >> 2),
    430 	0x00000000,
    431 	(0x9e00 << 16) | (0xc904 >> 2),
    432 	0x00000000,
    433 	(0xae00 << 16) | (0xc904 >> 2),
    434 	0x00000000,
    435 	(0xbe00 << 16) | (0xc904 >> 2),
    436 	0x00000000,
    437 	(0x4e00 << 16) | (0xc908 >> 2),
    438 	0x00000000,
    439 	(0x5e00 << 16) | (0xc908 >> 2),
    440 	0x00000000,
    441 	(0x6e00 << 16) | (0xc908 >> 2),
    442 	0x00000000,
    443 	(0x7e00 << 16) | (0xc908 >> 2),
    444 	0x00000000,
    445 	(0x8e00 << 16) | (0xc908 >> 2),
    446 	0x00000000,
    447 	(0x9e00 << 16) | (0xc908 >> 2),
    448 	0x00000000,
    449 	(0xae00 << 16) | (0xc908 >> 2),
    450 	0x00000000,
    451 	(0xbe00 << 16) | (0xc908 >> 2),
    452 	0x00000000,
    453 	(0x4e00 << 16) | (0xc90c >> 2),
    454 	0x00000000,
    455 	(0x5e00 << 16) | (0xc90c >> 2),
    456 	0x00000000,
    457 	(0x6e00 << 16) | (0xc90c >> 2),
    458 	0x00000000,
    459 	(0x7e00 << 16) | (0xc90c >> 2),
    460 	0x00000000,
    461 	(0x8e00 << 16) | (0xc90c >> 2),
    462 	0x00000000,
    463 	(0x9e00 << 16) | (0xc90c >> 2),
    464 	0x00000000,
    465 	(0xae00 << 16) | (0xc90c >> 2),
    466 	0x00000000,
    467 	(0xbe00 << 16) | (0xc90c >> 2),
    468 	0x00000000,
    469 	(0x4e00 << 16) | (0xc910 >> 2),
    470 	0x00000000,
    471 	(0x5e00 << 16) | (0xc910 >> 2),
    472 	0x00000000,
    473 	(0x6e00 << 16) | (0xc910 >> 2),
    474 	0x00000000,
    475 	(0x7e00 << 16) | (0xc910 >> 2),
    476 	0x00000000,
    477 	(0x8e00 << 16) | (0xc910 >> 2),
    478 	0x00000000,
    479 	(0x9e00 << 16) | (0xc910 >> 2),
    480 	0x00000000,
    481 	(0xae00 << 16) | (0xc910 >> 2),
    482 	0x00000000,
    483 	(0xbe00 << 16) | (0xc910 >> 2),
    484 	0x00000000,
    485 	(0x0e00 << 16) | (0xc99c >> 2),
    486 	0x00000000,
    487 	(0x0e00 << 16) | (0x9834 >> 2),
    488 	0x00000000,
    489 	(0x0000 << 16) | (0x30f00 >> 2),
    490 	0x00000000,
    491 	(0x0001 << 16) | (0x30f00 >> 2),
    492 	0x00000000,
    493 	(0x0000 << 16) | (0x30f04 >> 2),
    494 	0x00000000,
    495 	(0x0001 << 16) | (0x30f04 >> 2),
    496 	0x00000000,
    497 	(0x0000 << 16) | (0x30f08 >> 2),
    498 	0x00000000,
    499 	(0x0001 << 16) | (0x30f08 >> 2),
    500 	0x00000000,
    501 	(0x0000 << 16) | (0x30f0c >> 2),
    502 	0x00000000,
    503 	(0x0001 << 16) | (0x30f0c >> 2),
    504 	0x00000000,
    505 	(0x0600 << 16) | (0x9b7c >> 2),
    506 	0x00000000,
    507 	(0x0e00 << 16) | (0x8a14 >> 2),
    508 	0x00000000,
    509 	(0x0e00 << 16) | (0x8a18 >> 2),
    510 	0x00000000,
    511 	(0x0600 << 16) | (0x30a00 >> 2),
    512 	0x00000000,
    513 	(0x0e00 << 16) | (0x8bf0 >> 2),
    514 	0x00000000,
    515 	(0x0e00 << 16) | (0x8bcc >> 2),
    516 	0x00000000,
    517 	(0x0e00 << 16) | (0x8b24 >> 2),
    518 	0x00000000,
    519 	(0x0e00 << 16) | (0x30a04 >> 2),
    520 	0x00000000,
    521 	(0x0600 << 16) | (0x30a10 >> 2),
    522 	0x00000000,
    523 	(0x0600 << 16) | (0x30a14 >> 2),
    524 	0x00000000,
    525 	(0x0600 << 16) | (0x30a18 >> 2),
    526 	0x00000000,
    527 	(0x0600 << 16) | (0x30a2c >> 2),
    528 	0x00000000,
    529 	(0x0e00 << 16) | (0xc700 >> 2),
    530 	0x00000000,
    531 	(0x0e00 << 16) | (0xc704 >> 2),
    532 	0x00000000,
    533 	(0x0e00 << 16) | (0xc708 >> 2),
    534 	0x00000000,
    535 	(0x0e00 << 16) | (0xc768 >> 2),
    536 	0x00000000,
    537 	(0x0400 << 16) | (0xc770 >> 2),
    538 	0x00000000,
    539 	(0x0400 << 16) | (0xc774 >> 2),
    540 	0x00000000,
    541 	(0x0400 << 16) | (0xc778 >> 2),
    542 	0x00000000,
    543 	(0x0400 << 16) | (0xc77c >> 2),
    544 	0x00000000,
    545 	(0x0400 << 16) | (0xc780 >> 2),
    546 	0x00000000,
    547 	(0x0400 << 16) | (0xc784 >> 2),
    548 	0x00000000,
    549 	(0x0400 << 16) | (0xc788 >> 2),
    550 	0x00000000,
    551 	(0x0400 << 16) | (0xc78c >> 2),
    552 	0x00000000,
    553 	(0x0400 << 16) | (0xc798 >> 2),
    554 	0x00000000,
    555 	(0x0400 << 16) | (0xc79c >> 2),
    556 	0x00000000,
    557 	(0x0400 << 16) | (0xc7a0 >> 2),
    558 	0x00000000,
    559 	(0x0400 << 16) | (0xc7a4 >> 2),
    560 	0x00000000,
    561 	(0x0400 << 16) | (0xc7a8 >> 2),
    562 	0x00000000,
    563 	(0x0400 << 16) | (0xc7ac >> 2),
    564 	0x00000000,
    565 	(0x0400 << 16) | (0xc7b0 >> 2),
    566 	0x00000000,
    567 	(0x0400 << 16) | (0xc7b4 >> 2),
    568 	0x00000000,
    569 	(0x0e00 << 16) | (0x9100 >> 2),
    570 	0x00000000,
    571 	(0x0e00 << 16) | (0x3c010 >> 2),
    572 	0x00000000,
    573 	(0x0e00 << 16) | (0x92a8 >> 2),
    574 	0x00000000,
    575 	(0x0e00 << 16) | (0x92ac >> 2),
    576 	0x00000000,
    577 	(0x0e00 << 16) | (0x92b4 >> 2),
    578 	0x00000000,
    579 	(0x0e00 << 16) | (0x92b8 >> 2),
    580 	0x00000000,
    581 	(0x0e00 << 16) | (0x92bc >> 2),
    582 	0x00000000,
    583 	(0x0e00 << 16) | (0x92c0 >> 2),
    584 	0x00000000,
    585 	(0x0e00 << 16) | (0x92c4 >> 2),
    586 	0x00000000,
    587 	(0x0e00 << 16) | (0x92c8 >> 2),
    588 	0x00000000,
    589 	(0x0e00 << 16) | (0x92cc >> 2),
    590 	0x00000000,
    591 	(0x0e00 << 16) | (0x92d0 >> 2),
    592 	0x00000000,
    593 	(0x0e00 << 16) | (0x8c00 >> 2),
    594 	0x00000000,
    595 	(0x0e00 << 16) | (0x8c04 >> 2),
    596 	0x00000000,
    597 	(0x0e00 << 16) | (0x8c20 >> 2),
    598 	0x00000000,
    599 	(0x0e00 << 16) | (0x8c38 >> 2),
    600 	0x00000000,
    601 	(0x0e00 << 16) | (0x8c3c >> 2),
    602 	0x00000000,
    603 	(0x0e00 << 16) | (0xae00 >> 2),
    604 	0x00000000,
    605 	(0x0e00 << 16) | (0x9604 >> 2),
    606 	0x00000000,
    607 	(0x0e00 << 16) | (0xac08 >> 2),
    608 	0x00000000,
    609 	(0x0e00 << 16) | (0xac0c >> 2),
    610 	0x00000000,
    611 	(0x0e00 << 16) | (0xac10 >> 2),
    612 	0x00000000,
    613 	(0x0e00 << 16) | (0xac14 >> 2),
    614 	0x00000000,
    615 	(0x0e00 << 16) | (0xac58 >> 2),
    616 	0x00000000,
    617 	(0x0e00 << 16) | (0xac68 >> 2),
    618 	0x00000000,
    619 	(0x0e00 << 16) | (0xac6c >> 2),
    620 	0x00000000,
    621 	(0x0e00 << 16) | (0xac70 >> 2),
    622 	0x00000000,
    623 	(0x0e00 << 16) | (0xac74 >> 2),
    624 	0x00000000,
    625 	(0x0e00 << 16) | (0xac78 >> 2),
    626 	0x00000000,
    627 	(0x0e00 << 16) | (0xac7c >> 2),
    628 	0x00000000,
    629 	(0x0e00 << 16) | (0xac80 >> 2),
    630 	0x00000000,
    631 	(0x0e00 << 16) | (0xac84 >> 2),
    632 	0x00000000,
    633 	(0x0e00 << 16) | (0xac88 >> 2),
    634 	0x00000000,
    635 	(0x0e00 << 16) | (0xac8c >> 2),
    636 	0x00000000,
    637 	(0x0e00 << 16) | (0x970c >> 2),
    638 	0x00000000,
    639 	(0x0e00 << 16) | (0x9714 >> 2),
    640 	0x00000000,
    641 	(0x0e00 << 16) | (0x9718 >> 2),
    642 	0x00000000,
    643 	(0x0e00 << 16) | (0x971c >> 2),
    644 	0x00000000,
    645 	(0x0e00 << 16) | (0x31068 >> 2),
    646 	0x00000000,
    647 	(0x4e00 << 16) | (0x31068 >> 2),
    648 	0x00000000,
    649 	(0x5e00 << 16) | (0x31068 >> 2),
    650 	0x00000000,
    651 	(0x6e00 << 16) | (0x31068 >> 2),
    652 	0x00000000,
    653 	(0x7e00 << 16) | (0x31068 >> 2),
    654 	0x00000000,
    655 	(0x8e00 << 16) | (0x31068 >> 2),
    656 	0x00000000,
    657 	(0x9e00 << 16) | (0x31068 >> 2),
    658 	0x00000000,
    659 	(0xae00 << 16) | (0x31068 >> 2),
    660 	0x00000000,
    661 	(0xbe00 << 16) | (0x31068 >> 2),
    662 	0x00000000,
    663 	(0x0e00 << 16) | (0xcd10 >> 2),
    664 	0x00000000,
    665 	(0x0e00 << 16) | (0xcd14 >> 2),
    666 	0x00000000,
    667 	(0x0e00 << 16) | (0x88b0 >> 2),
    668 	0x00000000,
    669 	(0x0e00 << 16) | (0x88b4 >> 2),
    670 	0x00000000,
    671 	(0x0e00 << 16) | (0x88b8 >> 2),
    672 	0x00000000,
    673 	(0x0e00 << 16) | (0x88bc >> 2),
    674 	0x00000000,
    675 	(0x0400 << 16) | (0x89c0 >> 2),
    676 	0x00000000,
    677 	(0x0e00 << 16) | (0x88c4 >> 2),
    678 	0x00000000,
    679 	(0x0e00 << 16) | (0x88c8 >> 2),
    680 	0x00000000,
    681 	(0x0e00 << 16) | (0x88d0 >> 2),
    682 	0x00000000,
    683 	(0x0e00 << 16) | (0x88d4 >> 2),
    684 	0x00000000,
    685 	(0x0e00 << 16) | (0x88d8 >> 2),
    686 	0x00000000,
    687 	(0x0e00 << 16) | (0x8980 >> 2),
    688 	0x00000000,
    689 	(0x0e00 << 16) | (0x30938 >> 2),
    690 	0x00000000,
    691 	(0x0e00 << 16) | (0x3093c >> 2),
    692 	0x00000000,
    693 	(0x0e00 << 16) | (0x30940 >> 2),
    694 	0x00000000,
    695 	(0x0e00 << 16) | (0x89a0 >> 2),
    696 	0x00000000,
    697 	(0x0e00 << 16) | (0x30900 >> 2),
    698 	0x00000000,
    699 	(0x0e00 << 16) | (0x30904 >> 2),
    700 	0x00000000,
    701 	(0x0e00 << 16) | (0x89b4 >> 2),
    702 	0x00000000,
    703 	(0x0e00 << 16) | (0x3c210 >> 2),
    704 	0x00000000,
    705 	(0x0e00 << 16) | (0x3c214 >> 2),
    706 	0x00000000,
    707 	(0x0e00 << 16) | (0x3c218 >> 2),
    708 	0x00000000,
    709 	(0x0e00 << 16) | (0x8904 >> 2),
    710 	0x00000000,
    711 	0x5,
    712 	(0x0e00 << 16) | (0x8c28 >> 2),
    713 	(0x0e00 << 16) | (0x8c2c >> 2),
    714 	(0x0e00 << 16) | (0x8c30 >> 2),
    715 	(0x0e00 << 16) | (0x8c34 >> 2),
    716 	(0x0e00 << 16) | (0x9600 >> 2),
    717 };
    718 
    719 static const u32 kalindi_rlc_save_restore_register_list[] =
    720 {
    721 	(0x0e00 << 16) | (0xc12c >> 2),
    722 	0x00000000,
    723 	(0x0e00 << 16) | (0xc140 >> 2),
    724 	0x00000000,
    725 	(0x0e00 << 16) | (0xc150 >> 2),
    726 	0x00000000,
    727 	(0x0e00 << 16) | (0xc15c >> 2),
    728 	0x00000000,
    729 	(0x0e00 << 16) | (0xc168 >> 2),
    730 	0x00000000,
    731 	(0x0e00 << 16) | (0xc170 >> 2),
    732 	0x00000000,
    733 	(0x0e00 << 16) | (0xc204 >> 2),
    734 	0x00000000,
    735 	(0x0e00 << 16) | (0xc2b4 >> 2),
    736 	0x00000000,
    737 	(0x0e00 << 16) | (0xc2b8 >> 2),
    738 	0x00000000,
    739 	(0x0e00 << 16) | (0xc2bc >> 2),
    740 	0x00000000,
    741 	(0x0e00 << 16) | (0xc2c0 >> 2),
    742 	0x00000000,
    743 	(0x0e00 << 16) | (0x8228 >> 2),
    744 	0x00000000,
    745 	(0x0e00 << 16) | (0x829c >> 2),
    746 	0x00000000,
    747 	(0x0e00 << 16) | (0x869c >> 2),
    748 	0x00000000,
    749 	(0x0600 << 16) | (0x98f4 >> 2),
    750 	0x00000000,
    751 	(0x0e00 << 16) | (0x98f8 >> 2),
    752 	0x00000000,
    753 	(0x0e00 << 16) | (0x9900 >> 2),
    754 	0x00000000,
    755 	(0x0e00 << 16) | (0xc260 >> 2),
    756 	0x00000000,
    757 	(0x0e00 << 16) | (0x90e8 >> 2),
    758 	0x00000000,
    759 	(0x0e00 << 16) | (0x3c000 >> 2),
    760 	0x00000000,
    761 	(0x0e00 << 16) | (0x3c00c >> 2),
    762 	0x00000000,
    763 	(0x0e00 << 16) | (0x8c1c >> 2),
    764 	0x00000000,
    765 	(0x0e00 << 16) | (0x9700 >> 2),
    766 	0x00000000,
    767 	(0x0e00 << 16) | (0xcd20 >> 2),
    768 	0x00000000,
    769 	(0x4e00 << 16) | (0xcd20 >> 2),
    770 	0x00000000,
    771 	(0x5e00 << 16) | (0xcd20 >> 2),
    772 	0x00000000,
    773 	(0x6e00 << 16) | (0xcd20 >> 2),
    774 	0x00000000,
    775 	(0x7e00 << 16) | (0xcd20 >> 2),
    776 	0x00000000,
    777 	(0x0e00 << 16) | (0x89bc >> 2),
    778 	0x00000000,
    779 	(0x0e00 << 16) | (0x8900 >> 2),
    780 	0x00000000,
    781 	0x3,
    782 	(0x0e00 << 16) | (0xc130 >> 2),
    783 	0x00000000,
    784 	(0x0e00 << 16) | (0xc134 >> 2),
    785 	0x00000000,
    786 	(0x0e00 << 16) | (0xc1fc >> 2),
    787 	0x00000000,
    788 	(0x0e00 << 16) | (0xc208 >> 2),
    789 	0x00000000,
    790 	(0x0e00 << 16) | (0xc264 >> 2),
    791 	0x00000000,
    792 	(0x0e00 << 16) | (0xc268 >> 2),
    793 	0x00000000,
    794 	(0x0e00 << 16) | (0xc26c >> 2),
    795 	0x00000000,
    796 	(0x0e00 << 16) | (0xc270 >> 2),
    797 	0x00000000,
    798 	(0x0e00 << 16) | (0xc274 >> 2),
    799 	0x00000000,
    800 	(0x0e00 << 16) | (0xc28c >> 2),
    801 	0x00000000,
    802 	(0x0e00 << 16) | (0xc290 >> 2),
    803 	0x00000000,
    804 	(0x0e00 << 16) | (0xc294 >> 2),
    805 	0x00000000,
    806 	(0x0e00 << 16) | (0xc298 >> 2),
    807 	0x00000000,
    808 	(0x0e00 << 16) | (0xc2a0 >> 2),
    809 	0x00000000,
    810 	(0x0e00 << 16) | (0xc2a4 >> 2),
    811 	0x00000000,
    812 	(0x0e00 << 16) | (0xc2a8 >> 2),
    813 	0x00000000,
    814 	(0x0e00 << 16) | (0xc2ac >> 2),
    815 	0x00000000,
    816 	(0x0e00 << 16) | (0x301d0 >> 2),
    817 	0x00000000,
    818 	(0x0e00 << 16) | (0x30238 >> 2),
    819 	0x00000000,
    820 	(0x0e00 << 16) | (0x30250 >> 2),
    821 	0x00000000,
    822 	(0x0e00 << 16) | (0x30254 >> 2),
    823 	0x00000000,
    824 	(0x0e00 << 16) | (0x30258 >> 2),
    825 	0x00000000,
    826 	(0x0e00 << 16) | (0x3025c >> 2),
    827 	0x00000000,
    828 	(0x4e00 << 16) | (0xc900 >> 2),
    829 	0x00000000,
    830 	(0x5e00 << 16) | (0xc900 >> 2),
    831 	0x00000000,
    832 	(0x6e00 << 16) | (0xc900 >> 2),
    833 	0x00000000,
    834 	(0x7e00 << 16) | (0xc900 >> 2),
    835 	0x00000000,
    836 	(0x4e00 << 16) | (0xc904 >> 2),
    837 	0x00000000,
    838 	(0x5e00 << 16) | (0xc904 >> 2),
    839 	0x00000000,
    840 	(0x6e00 << 16) | (0xc904 >> 2),
    841 	0x00000000,
    842 	(0x7e00 << 16) | (0xc904 >> 2),
    843 	0x00000000,
    844 	(0x4e00 << 16) | (0xc908 >> 2),
    845 	0x00000000,
    846 	(0x5e00 << 16) | (0xc908 >> 2),
    847 	0x00000000,
    848 	(0x6e00 << 16) | (0xc908 >> 2),
    849 	0x00000000,
    850 	(0x7e00 << 16) | (0xc908 >> 2),
    851 	0x00000000,
    852 	(0x4e00 << 16) | (0xc90c >> 2),
    853 	0x00000000,
    854 	(0x5e00 << 16) | (0xc90c >> 2),
    855 	0x00000000,
    856 	(0x6e00 << 16) | (0xc90c >> 2),
    857 	0x00000000,
    858 	(0x7e00 << 16) | (0xc90c >> 2),
    859 	0x00000000,
    860 	(0x4e00 << 16) | (0xc910 >> 2),
    861 	0x00000000,
    862 	(0x5e00 << 16) | (0xc910 >> 2),
    863 	0x00000000,
    864 	(0x6e00 << 16) | (0xc910 >> 2),
    865 	0x00000000,
    866 	(0x7e00 << 16) | (0xc910 >> 2),
    867 	0x00000000,
    868 	(0x0e00 << 16) | (0xc99c >> 2),
    869 	0x00000000,
    870 	(0x0e00 << 16) | (0x9834 >> 2),
    871 	0x00000000,
    872 	(0x0000 << 16) | (0x30f00 >> 2),
    873 	0x00000000,
    874 	(0x0000 << 16) | (0x30f04 >> 2),
    875 	0x00000000,
    876 	(0x0000 << 16) | (0x30f08 >> 2),
    877 	0x00000000,
    878 	(0x0000 << 16) | (0x30f0c >> 2),
    879 	0x00000000,
    880 	(0x0600 << 16) | (0x9b7c >> 2),
    881 	0x00000000,
    882 	(0x0e00 << 16) | (0x8a14 >> 2),
    883 	0x00000000,
    884 	(0x0e00 << 16) | (0x8a18 >> 2),
    885 	0x00000000,
    886 	(0x0600 << 16) | (0x30a00 >> 2),
    887 	0x00000000,
    888 	(0x0e00 << 16) | (0x8bf0 >> 2),
    889 	0x00000000,
    890 	(0x0e00 << 16) | (0x8bcc >> 2),
    891 	0x00000000,
    892 	(0x0e00 << 16) | (0x8b24 >> 2),
    893 	0x00000000,
    894 	(0x0e00 << 16) | (0x30a04 >> 2),
    895 	0x00000000,
    896 	(0x0600 << 16) | (0x30a10 >> 2),
    897 	0x00000000,
    898 	(0x0600 << 16) | (0x30a14 >> 2),
    899 	0x00000000,
    900 	(0x0600 << 16) | (0x30a18 >> 2),
    901 	0x00000000,
    902 	(0x0600 << 16) | (0x30a2c >> 2),
    903 	0x00000000,
    904 	(0x0e00 << 16) | (0xc700 >> 2),
    905 	0x00000000,
    906 	(0x0e00 << 16) | (0xc704 >> 2),
    907 	0x00000000,
    908 	(0x0e00 << 16) | (0xc708 >> 2),
    909 	0x00000000,
    910 	(0x0e00 << 16) | (0xc768 >> 2),
    911 	0x00000000,
    912 	(0x0400 << 16) | (0xc770 >> 2),
    913 	0x00000000,
    914 	(0x0400 << 16) | (0xc774 >> 2),
    915 	0x00000000,
    916 	(0x0400 << 16) | (0xc798 >> 2),
    917 	0x00000000,
    918 	(0x0400 << 16) | (0xc79c >> 2),
    919 	0x00000000,
    920 	(0x0e00 << 16) | (0x9100 >> 2),
    921 	0x00000000,
    922 	(0x0e00 << 16) | (0x3c010 >> 2),
    923 	0x00000000,
    924 	(0x0e00 << 16) | (0x8c00 >> 2),
    925 	0x00000000,
    926 	(0x0e00 << 16) | (0x8c04 >> 2),
    927 	0x00000000,
    928 	(0x0e00 << 16) | (0x8c20 >> 2),
    929 	0x00000000,
    930 	(0x0e00 << 16) | (0x8c38 >> 2),
    931 	0x00000000,
    932 	(0x0e00 << 16) | (0x8c3c >> 2),
    933 	0x00000000,
    934 	(0x0e00 << 16) | (0xae00 >> 2),
    935 	0x00000000,
    936 	(0x0e00 << 16) | (0x9604 >> 2),
    937 	0x00000000,
    938 	(0x0e00 << 16) | (0xac08 >> 2),
    939 	0x00000000,
    940 	(0x0e00 << 16) | (0xac0c >> 2),
    941 	0x00000000,
    942 	(0x0e00 << 16) | (0xac10 >> 2),
    943 	0x00000000,
    944 	(0x0e00 << 16) | (0xac14 >> 2),
    945 	0x00000000,
    946 	(0x0e00 << 16) | (0xac58 >> 2),
    947 	0x00000000,
    948 	(0x0e00 << 16) | (0xac68 >> 2),
    949 	0x00000000,
    950 	(0x0e00 << 16) | (0xac6c >> 2),
    951 	0x00000000,
    952 	(0x0e00 << 16) | (0xac70 >> 2),
    953 	0x00000000,
    954 	(0x0e00 << 16) | (0xac74 >> 2),
    955 	0x00000000,
    956 	(0x0e00 << 16) | (0xac78 >> 2),
    957 	0x00000000,
    958 	(0x0e00 << 16) | (0xac7c >> 2),
    959 	0x00000000,
    960 	(0x0e00 << 16) | (0xac80 >> 2),
    961 	0x00000000,
    962 	(0x0e00 << 16) | (0xac84 >> 2),
    963 	0x00000000,
    964 	(0x0e00 << 16) | (0xac88 >> 2),
    965 	0x00000000,
    966 	(0x0e00 << 16) | (0xac8c >> 2),
    967 	0x00000000,
    968 	(0x0e00 << 16) | (0x970c >> 2),
    969 	0x00000000,
    970 	(0x0e00 << 16) | (0x9714 >> 2),
    971 	0x00000000,
    972 	(0x0e00 << 16) | (0x9718 >> 2),
    973 	0x00000000,
    974 	(0x0e00 << 16) | (0x971c >> 2),
    975 	0x00000000,
    976 	(0x0e00 << 16) | (0x31068 >> 2),
    977 	0x00000000,
    978 	(0x4e00 << 16) | (0x31068 >> 2),
    979 	0x00000000,
    980 	(0x5e00 << 16) | (0x31068 >> 2),
    981 	0x00000000,
    982 	(0x6e00 << 16) | (0x31068 >> 2),
    983 	0x00000000,
    984 	(0x7e00 << 16) | (0x31068 >> 2),
    985 	0x00000000,
    986 	(0x0e00 << 16) | (0xcd10 >> 2),
    987 	0x00000000,
    988 	(0x0e00 << 16) | (0xcd14 >> 2),
    989 	0x00000000,
    990 	(0x0e00 << 16) | (0x88b0 >> 2),
    991 	0x00000000,
    992 	(0x0e00 << 16) | (0x88b4 >> 2),
    993 	0x00000000,
    994 	(0x0e00 << 16) | (0x88b8 >> 2),
    995 	0x00000000,
    996 	(0x0e00 << 16) | (0x88bc >> 2),
    997 	0x00000000,
    998 	(0x0400 << 16) | (0x89c0 >> 2),
    999 	0x00000000,
   1000 	(0x0e00 << 16) | (0x88c4 >> 2),
   1001 	0x00000000,
   1002 	(0x0e00 << 16) | (0x88c8 >> 2),
   1003 	0x00000000,
   1004 	(0x0e00 << 16) | (0x88d0 >> 2),
   1005 	0x00000000,
   1006 	(0x0e00 << 16) | (0x88d4 >> 2),
   1007 	0x00000000,
   1008 	(0x0e00 << 16) | (0x88d8 >> 2),
   1009 	0x00000000,
   1010 	(0x0e00 << 16) | (0x8980 >> 2),
   1011 	0x00000000,
   1012 	(0x0e00 << 16) | (0x30938 >> 2),
   1013 	0x00000000,
   1014 	(0x0e00 << 16) | (0x3093c >> 2),
   1015 	0x00000000,
   1016 	(0x0e00 << 16) | (0x30940 >> 2),
   1017 	0x00000000,
   1018 	(0x0e00 << 16) | (0x89a0 >> 2),
   1019 	0x00000000,
   1020 	(0x0e00 << 16) | (0x30900 >> 2),
   1021 	0x00000000,
   1022 	(0x0e00 << 16) | (0x30904 >> 2),
   1023 	0x00000000,
   1024 	(0x0e00 << 16) | (0x89b4 >> 2),
   1025 	0x00000000,
   1026 	(0x0e00 << 16) | (0x3e1fc >> 2),
   1027 	0x00000000,
   1028 	(0x0e00 << 16) | (0x3c210 >> 2),
   1029 	0x00000000,
   1030 	(0x0e00 << 16) | (0x3c214 >> 2),
   1031 	0x00000000,
   1032 	(0x0e00 << 16) | (0x3c218 >> 2),
   1033 	0x00000000,
   1034 	(0x0e00 << 16) | (0x8904 >> 2),
   1035 	0x00000000,
   1036 	0x5,
   1037 	(0x0e00 << 16) | (0x8c28 >> 2),
   1038 	(0x0e00 << 16) | (0x8c2c >> 2),
   1039 	(0x0e00 << 16) | (0x8c30 >> 2),
   1040 	(0x0e00 << 16) | (0x8c34 >> 2),
   1041 	(0x0e00 << 16) | (0x9600 >> 2),
   1042 };
   1043 
   1044 static const u32 bonaire_golden_spm_registers[] =
   1045 {
   1046 	0x30800, 0xe0ffffff, 0xe0000000
   1047 };
   1048 
   1049 static const u32 bonaire_golden_common_registers[] =
   1050 {
   1051 	0xc770, 0xffffffff, 0x00000800,
   1052 	0xc774, 0xffffffff, 0x00000800,
   1053 	0xc798, 0xffffffff, 0x00007fbf,
   1054 	0xc79c, 0xffffffff, 0x00007faf
   1055 };
   1056 
   1057 static const u32 bonaire_golden_registers[] =
   1058 {
   1059 	0x3354, 0x00000333, 0x00000333,
   1060 	0x3350, 0x000c0fc0, 0x00040200,
   1061 	0x9a10, 0x00010000, 0x00058208,
   1062 	0x3c000, 0xffff1fff, 0x00140000,
   1063 	0x3c200, 0xfdfc0fff, 0x00000100,
   1064 	0x3c234, 0x40000000, 0x40000200,
   1065 	0x9830, 0xffffffff, 0x00000000,
   1066 	0x9834, 0xf00fffff, 0x00000400,
   1067 	0x9838, 0x0002021c, 0x00020200,
   1068 	0xc78, 0x00000080, 0x00000000,
   1069 	0x5bb0, 0x000000f0, 0x00000070,
   1070 	0x5bc0, 0xf0311fff, 0x80300000,
   1071 	0x98f8, 0x73773777, 0x12010001,
   1072 	0x350c, 0x00810000, 0x408af000,
   1073 	0x7030, 0x31000111, 0x00000011,
   1074 	0x2f48, 0x73773777, 0x12010001,
   1075 	0x220c, 0x00007fb6, 0x0021a1b1,
   1076 	0x2210, 0x00007fb6, 0x002021b1,
   1077 	0x2180, 0x00007fb6, 0x00002191,
   1078 	0x2218, 0x00007fb6, 0x002121b1,
   1079 	0x221c, 0x00007fb6, 0x002021b1,
   1080 	0x21dc, 0x00007fb6, 0x00002191,
   1081 	0x21e0, 0x00007fb6, 0x00002191,
   1082 	0x3628, 0x0000003f, 0x0000000a,
   1083 	0x362c, 0x0000003f, 0x0000000a,
   1084 	0x2ae4, 0x00073ffe, 0x000022a2,
   1085 	0x240c, 0x000007ff, 0x00000000,
   1086 	0x8a14, 0xf000003f, 0x00000007,
   1087 	0x8bf0, 0x00002001, 0x00000001,
   1088 	0x8b24, 0xffffffff, 0x00ffffff,
   1089 	0x30a04, 0x0000ff0f, 0x00000000,
   1090 	0x28a4c, 0x07ffffff, 0x06000000,
   1091 	0x4d8, 0x00000fff, 0x00000100,
   1092 	0x3e78, 0x00000001, 0x00000002,
   1093 	0x9100, 0x03000000, 0x0362c688,
   1094 	0x8c00, 0x000000ff, 0x00000001,
   1095 	0xe40, 0x00001fff, 0x00001fff,
   1096 	0x9060, 0x0000007f, 0x00000020,
   1097 	0x9508, 0x00010000, 0x00010000,
   1098 	0xac14, 0x000003ff, 0x000000f3,
   1099 	0xac0c, 0xffffffff, 0x00001032
   1100 };
   1101 
   1102 static const u32 bonaire_mgcg_cgcg_init[] =
   1103 {
   1104 	0xc420, 0xffffffff, 0xfffffffc,
   1105 	0x30800, 0xffffffff, 0xe0000000,
   1106 	0x3c2a0, 0xffffffff, 0x00000100,
   1107 	0x3c208, 0xffffffff, 0x00000100,
   1108 	0x3c2c0, 0xffffffff, 0xc0000100,
   1109 	0x3c2c8, 0xffffffff, 0xc0000100,
   1110 	0x3c2c4, 0xffffffff, 0xc0000100,
   1111 	0x55e4, 0xffffffff, 0x00600100,
   1112 	0x3c280, 0xffffffff, 0x00000100,
   1113 	0x3c214, 0xffffffff, 0x06000100,
   1114 	0x3c220, 0xffffffff, 0x00000100,
   1115 	0x3c218, 0xffffffff, 0x06000100,
   1116 	0x3c204, 0xffffffff, 0x00000100,
   1117 	0x3c2e0, 0xffffffff, 0x00000100,
   1118 	0x3c224, 0xffffffff, 0x00000100,
   1119 	0x3c200, 0xffffffff, 0x00000100,
   1120 	0x3c230, 0xffffffff, 0x00000100,
   1121 	0x3c234, 0xffffffff, 0x00000100,
   1122 	0x3c250, 0xffffffff, 0x00000100,
   1123 	0x3c254, 0xffffffff, 0x00000100,
   1124 	0x3c258, 0xffffffff, 0x00000100,
   1125 	0x3c25c, 0xffffffff, 0x00000100,
   1126 	0x3c260, 0xffffffff, 0x00000100,
   1127 	0x3c27c, 0xffffffff, 0x00000100,
   1128 	0x3c278, 0xffffffff, 0x00000100,
   1129 	0x3c210, 0xffffffff, 0x06000100,
   1130 	0x3c290, 0xffffffff, 0x00000100,
   1131 	0x3c274, 0xffffffff, 0x00000100,
   1132 	0x3c2b4, 0xffffffff, 0x00000100,
   1133 	0x3c2b0, 0xffffffff, 0x00000100,
   1134 	0x3c270, 0xffffffff, 0x00000100,
   1135 	0x30800, 0xffffffff, 0xe0000000,
   1136 	0x3c020, 0xffffffff, 0x00010000,
   1137 	0x3c024, 0xffffffff, 0x00030002,
   1138 	0x3c028, 0xffffffff, 0x00040007,
   1139 	0x3c02c, 0xffffffff, 0x00060005,
   1140 	0x3c030, 0xffffffff, 0x00090008,
   1141 	0x3c034, 0xffffffff, 0x00010000,
   1142 	0x3c038, 0xffffffff, 0x00030002,
   1143 	0x3c03c, 0xffffffff, 0x00040007,
   1144 	0x3c040, 0xffffffff, 0x00060005,
   1145 	0x3c044, 0xffffffff, 0x00090008,
   1146 	0x3c048, 0xffffffff, 0x00010000,
   1147 	0x3c04c, 0xffffffff, 0x00030002,
   1148 	0x3c050, 0xffffffff, 0x00040007,
   1149 	0x3c054, 0xffffffff, 0x00060005,
   1150 	0x3c058, 0xffffffff, 0x00090008,
   1151 	0x3c05c, 0xffffffff, 0x00010000,
   1152 	0x3c060, 0xffffffff, 0x00030002,
   1153 	0x3c064, 0xffffffff, 0x00040007,
   1154 	0x3c068, 0xffffffff, 0x00060005,
   1155 	0x3c06c, 0xffffffff, 0x00090008,
   1156 	0x3c070, 0xffffffff, 0x00010000,
   1157 	0x3c074, 0xffffffff, 0x00030002,
   1158 	0x3c078, 0xffffffff, 0x00040007,
   1159 	0x3c07c, 0xffffffff, 0x00060005,
   1160 	0x3c080, 0xffffffff, 0x00090008,
   1161 	0x3c084, 0xffffffff, 0x00010000,
   1162 	0x3c088, 0xffffffff, 0x00030002,
   1163 	0x3c08c, 0xffffffff, 0x00040007,
   1164 	0x3c090, 0xffffffff, 0x00060005,
   1165 	0x3c094, 0xffffffff, 0x00090008,
   1166 	0x3c098, 0xffffffff, 0x00010000,
   1167 	0x3c09c, 0xffffffff, 0x00030002,
   1168 	0x3c0a0, 0xffffffff, 0x00040007,
   1169 	0x3c0a4, 0xffffffff, 0x00060005,
   1170 	0x3c0a8, 0xffffffff, 0x00090008,
   1171 	0x3c000, 0xffffffff, 0x96e00200,
   1172 	0x8708, 0xffffffff, 0x00900100,
   1173 	0xc424, 0xffffffff, 0x0020003f,
   1174 	0x38, 0xffffffff, 0x0140001c,
   1175 	0x3c, 0x000f0000, 0x000f0000,
   1176 	0x220, 0xffffffff, 0xC060000C,
   1177 	0x224, 0xc0000fff, 0x00000100,
   1178 	0xf90, 0xffffffff, 0x00000100,
   1179 	0xf98, 0x00000101, 0x00000000,
   1180 	0x20a8, 0xffffffff, 0x00000104,
   1181 	0x55e4, 0xff000fff, 0x00000100,
   1182 	0x30cc, 0xc0000fff, 0x00000104,
   1183 	0xc1e4, 0x00000001, 0x00000001,
   1184 	0xd00c, 0xff000ff0, 0x00000100,
   1185 	0xd80c, 0xff000ff0, 0x00000100
   1186 };
   1187 
   1188 static const u32 spectre_golden_spm_registers[] =
   1189 {
   1190 	0x30800, 0xe0ffffff, 0xe0000000
   1191 };
   1192 
   1193 static const u32 spectre_golden_common_registers[] =
   1194 {
   1195 	0xc770, 0xffffffff, 0x00000800,
   1196 	0xc774, 0xffffffff, 0x00000800,
   1197 	0xc798, 0xffffffff, 0x00007fbf,
   1198 	0xc79c, 0xffffffff, 0x00007faf
   1199 };
   1200 
   1201 static const u32 spectre_golden_registers[] =
   1202 {
   1203 	0x3c000, 0xffff1fff, 0x96940200,
   1204 	0x3c00c, 0xffff0001, 0xff000000,
   1205 	0x3c200, 0xfffc0fff, 0x00000100,
   1206 	0x6ed8, 0x00010101, 0x00010000,
   1207 	0x9834, 0xf00fffff, 0x00000400,
   1208 	0x9838, 0xfffffffc, 0x00020200,
   1209 	0x5bb0, 0x000000f0, 0x00000070,
   1210 	0x5bc0, 0xf0311fff, 0x80300000,
   1211 	0x98f8, 0x73773777, 0x12010001,
   1212 	0x9b7c, 0x00ff0000, 0x00fc0000,
   1213 	0x2f48, 0x73773777, 0x12010001,
   1214 	0x8a14, 0xf000003f, 0x00000007,
   1215 	0x8b24, 0xffffffff, 0x00ffffff,
   1216 	0x28350, 0x3f3f3fff, 0x00000082,
   1217 	0x28354, 0x0000003f, 0x00000000,
   1218 	0x3e78, 0x00000001, 0x00000002,
   1219 	0x913c, 0xffff03df, 0x00000004,
   1220 	0xc768, 0x00000008, 0x00000008,
   1221 	0x8c00, 0x000008ff, 0x00000800,
   1222 	0x9508, 0x00010000, 0x00010000,
   1223 	0xac0c, 0xffffffff, 0x54763210,
   1224 	0x214f8, 0x01ff01ff, 0x00000002,
   1225 	0x21498, 0x007ff800, 0x00200000,
   1226 	0x2015c, 0xffffffff, 0x00000f40,
   1227 	0x30934, 0xffffffff, 0x00000001
   1228 };
   1229 
   1230 static const u32 spectre_mgcg_cgcg_init[] =
   1231 {
   1232 	0xc420, 0xffffffff, 0xfffffffc,
   1233 	0x30800, 0xffffffff, 0xe0000000,
   1234 	0x3c2a0, 0xffffffff, 0x00000100,
   1235 	0x3c208, 0xffffffff, 0x00000100,
   1236 	0x3c2c0, 0xffffffff, 0x00000100,
   1237 	0x3c2c8, 0xffffffff, 0x00000100,
   1238 	0x3c2c4, 0xffffffff, 0x00000100,
   1239 	0x55e4, 0xffffffff, 0x00600100,
   1240 	0x3c280, 0xffffffff, 0x00000100,
   1241 	0x3c214, 0xffffffff, 0x06000100,
   1242 	0x3c220, 0xffffffff, 0x00000100,
   1243 	0x3c218, 0xffffffff, 0x06000100,
   1244 	0x3c204, 0xffffffff, 0x00000100,
   1245 	0x3c2e0, 0xffffffff, 0x00000100,
   1246 	0x3c224, 0xffffffff, 0x00000100,
   1247 	0x3c200, 0xffffffff, 0x00000100,
   1248 	0x3c230, 0xffffffff, 0x00000100,
   1249 	0x3c234, 0xffffffff, 0x00000100,
   1250 	0x3c250, 0xffffffff, 0x00000100,
   1251 	0x3c254, 0xffffffff, 0x00000100,
   1252 	0x3c258, 0xffffffff, 0x00000100,
   1253 	0x3c25c, 0xffffffff, 0x00000100,
   1254 	0x3c260, 0xffffffff, 0x00000100,
   1255 	0x3c27c, 0xffffffff, 0x00000100,
   1256 	0x3c278, 0xffffffff, 0x00000100,
   1257 	0x3c210, 0xffffffff, 0x06000100,
   1258 	0x3c290, 0xffffffff, 0x00000100,
   1259 	0x3c274, 0xffffffff, 0x00000100,
   1260 	0x3c2b4, 0xffffffff, 0x00000100,
   1261 	0x3c2b0, 0xffffffff, 0x00000100,
   1262 	0x3c270, 0xffffffff, 0x00000100,
   1263 	0x30800, 0xffffffff, 0xe0000000,
   1264 	0x3c020, 0xffffffff, 0x00010000,
   1265 	0x3c024, 0xffffffff, 0x00030002,
   1266 	0x3c028, 0xffffffff, 0x00040007,
   1267 	0x3c02c, 0xffffffff, 0x00060005,
   1268 	0x3c030, 0xffffffff, 0x00090008,
   1269 	0x3c034, 0xffffffff, 0x00010000,
   1270 	0x3c038, 0xffffffff, 0x00030002,
   1271 	0x3c03c, 0xffffffff, 0x00040007,
   1272 	0x3c040, 0xffffffff, 0x00060005,
   1273 	0x3c044, 0xffffffff, 0x00090008,
   1274 	0x3c048, 0xffffffff, 0x00010000,
   1275 	0x3c04c, 0xffffffff, 0x00030002,
   1276 	0x3c050, 0xffffffff, 0x00040007,
   1277 	0x3c054, 0xffffffff, 0x00060005,
   1278 	0x3c058, 0xffffffff, 0x00090008,
   1279 	0x3c05c, 0xffffffff, 0x00010000,
   1280 	0x3c060, 0xffffffff, 0x00030002,
   1281 	0x3c064, 0xffffffff, 0x00040007,
   1282 	0x3c068, 0xffffffff, 0x00060005,
   1283 	0x3c06c, 0xffffffff, 0x00090008,
   1284 	0x3c070, 0xffffffff, 0x00010000,
   1285 	0x3c074, 0xffffffff, 0x00030002,
   1286 	0x3c078, 0xffffffff, 0x00040007,
   1287 	0x3c07c, 0xffffffff, 0x00060005,
   1288 	0x3c080, 0xffffffff, 0x00090008,
   1289 	0x3c084, 0xffffffff, 0x00010000,
   1290 	0x3c088, 0xffffffff, 0x00030002,
   1291 	0x3c08c, 0xffffffff, 0x00040007,
   1292 	0x3c090, 0xffffffff, 0x00060005,
   1293 	0x3c094, 0xffffffff, 0x00090008,
   1294 	0x3c098, 0xffffffff, 0x00010000,
   1295 	0x3c09c, 0xffffffff, 0x00030002,
   1296 	0x3c0a0, 0xffffffff, 0x00040007,
   1297 	0x3c0a4, 0xffffffff, 0x00060005,
   1298 	0x3c0a8, 0xffffffff, 0x00090008,
   1299 	0x3c0ac, 0xffffffff, 0x00010000,
   1300 	0x3c0b0, 0xffffffff, 0x00030002,
   1301 	0x3c0b4, 0xffffffff, 0x00040007,
   1302 	0x3c0b8, 0xffffffff, 0x00060005,
   1303 	0x3c0bc, 0xffffffff, 0x00090008,
   1304 	0x3c000, 0xffffffff, 0x96e00200,
   1305 	0x8708, 0xffffffff, 0x00900100,
   1306 	0xc424, 0xffffffff, 0x0020003f,
   1307 	0x38, 0xffffffff, 0x0140001c,
   1308 	0x3c, 0x000f0000, 0x000f0000,
   1309 	0x220, 0xffffffff, 0xC060000C,
   1310 	0x224, 0xc0000fff, 0x00000100,
   1311 	0xf90, 0xffffffff, 0x00000100,
   1312 	0xf98, 0x00000101, 0x00000000,
   1313 	0x20a8, 0xffffffff, 0x00000104,
   1314 	0x55e4, 0xff000fff, 0x00000100,
   1315 	0x30cc, 0xc0000fff, 0x00000104,
   1316 	0xc1e4, 0x00000001, 0x00000001,
   1317 	0xd00c, 0xff000ff0, 0x00000100,
   1318 	0xd80c, 0xff000ff0, 0x00000100
   1319 };
   1320 
   1321 static const u32 kalindi_golden_spm_registers[] =
   1322 {
   1323 	0x30800, 0xe0ffffff, 0xe0000000
   1324 };
   1325 
   1326 static const u32 kalindi_golden_common_registers[] =
   1327 {
   1328 	0xc770, 0xffffffff, 0x00000800,
   1329 	0xc774, 0xffffffff, 0x00000800,
   1330 	0xc798, 0xffffffff, 0x00007fbf,
   1331 	0xc79c, 0xffffffff, 0x00007faf
   1332 };
   1333 
   1334 static const u32 kalindi_golden_registers[] =
   1335 {
   1336 	0x3c000, 0xffffdfff, 0x6e944040,
   1337 	0x55e4, 0xff607fff, 0xfc000100,
   1338 	0x3c220, 0xff000fff, 0x00000100,
   1339 	0x3c224, 0xff000fff, 0x00000100,
   1340 	0x3c200, 0xfffc0fff, 0x00000100,
   1341 	0x6ed8, 0x00010101, 0x00010000,
   1342 	0x9830, 0xffffffff, 0x00000000,
   1343 	0x9834, 0xf00fffff, 0x00000400,
   1344 	0x5bb0, 0x000000f0, 0x00000070,
   1345 	0x5bc0, 0xf0311fff, 0x80300000,
   1346 	0x98f8, 0x73773777, 0x12010001,
   1347 	0x98fc, 0xffffffff, 0x00000010,
   1348 	0x9b7c, 0x00ff0000, 0x00fc0000,
   1349 	0x8030, 0x00001f0f, 0x0000100a,
   1350 	0x2f48, 0x73773777, 0x12010001,
   1351 	0x2408, 0x000fffff, 0x000c007f,
   1352 	0x8a14, 0xf000003f, 0x00000007,
   1353 	0x8b24, 0x3fff3fff, 0x00ffcfff,
   1354 	0x30a04, 0x0000ff0f, 0x00000000,
   1355 	0x28a4c, 0x07ffffff, 0x06000000,
   1356 	0x4d8, 0x00000fff, 0x00000100,
   1357 	0x3e78, 0x00000001, 0x00000002,
   1358 	0xc768, 0x00000008, 0x00000008,
   1359 	0x8c00, 0x000000ff, 0x00000003,
   1360 	0x214f8, 0x01ff01ff, 0x00000002,
   1361 	0x21498, 0x007ff800, 0x00200000,
   1362 	0x2015c, 0xffffffff, 0x00000f40,
   1363 	0x88c4, 0x001f3ae3, 0x00000082,
   1364 	0x88d4, 0x0000001f, 0x00000010,
   1365 	0x30934, 0xffffffff, 0x00000000
   1366 };
   1367 
   1368 static const u32 kalindi_mgcg_cgcg_init[] =
   1369 {
   1370 	0xc420, 0xffffffff, 0xfffffffc,
   1371 	0x30800, 0xffffffff, 0xe0000000,
   1372 	0x3c2a0, 0xffffffff, 0x00000100,
   1373 	0x3c208, 0xffffffff, 0x00000100,
   1374 	0x3c2c0, 0xffffffff, 0x00000100,
   1375 	0x3c2c8, 0xffffffff, 0x00000100,
   1376 	0x3c2c4, 0xffffffff, 0x00000100,
   1377 	0x55e4, 0xffffffff, 0x00600100,
   1378 	0x3c280, 0xffffffff, 0x00000100,
   1379 	0x3c214, 0xffffffff, 0x06000100,
   1380 	0x3c220, 0xffffffff, 0x00000100,
   1381 	0x3c218, 0xffffffff, 0x06000100,
   1382 	0x3c204, 0xffffffff, 0x00000100,
   1383 	0x3c2e0, 0xffffffff, 0x00000100,
   1384 	0x3c224, 0xffffffff, 0x00000100,
   1385 	0x3c200, 0xffffffff, 0x00000100,
   1386 	0x3c230, 0xffffffff, 0x00000100,
   1387 	0x3c234, 0xffffffff, 0x00000100,
   1388 	0x3c250, 0xffffffff, 0x00000100,
   1389 	0x3c254, 0xffffffff, 0x00000100,
   1390 	0x3c258, 0xffffffff, 0x00000100,
   1391 	0x3c25c, 0xffffffff, 0x00000100,
   1392 	0x3c260, 0xffffffff, 0x00000100,
   1393 	0x3c27c, 0xffffffff, 0x00000100,
   1394 	0x3c278, 0xffffffff, 0x00000100,
   1395 	0x3c210, 0xffffffff, 0x06000100,
   1396 	0x3c290, 0xffffffff, 0x00000100,
   1397 	0x3c274, 0xffffffff, 0x00000100,
   1398 	0x3c2b4, 0xffffffff, 0x00000100,
   1399 	0x3c2b0, 0xffffffff, 0x00000100,
   1400 	0x3c270, 0xffffffff, 0x00000100,
   1401 	0x30800, 0xffffffff, 0xe0000000,
   1402 	0x3c020, 0xffffffff, 0x00010000,
   1403 	0x3c024, 0xffffffff, 0x00030002,
   1404 	0x3c028, 0xffffffff, 0x00040007,
   1405 	0x3c02c, 0xffffffff, 0x00060005,
   1406 	0x3c030, 0xffffffff, 0x00090008,
   1407 	0x3c034, 0xffffffff, 0x00010000,
   1408 	0x3c038, 0xffffffff, 0x00030002,
   1409 	0x3c03c, 0xffffffff, 0x00040007,
   1410 	0x3c040, 0xffffffff, 0x00060005,
   1411 	0x3c044, 0xffffffff, 0x00090008,
   1412 	0x3c000, 0xffffffff, 0x96e00200,
   1413 	0x8708, 0xffffffff, 0x00900100,
   1414 	0xc424, 0xffffffff, 0x0020003f,
   1415 	0x38, 0xffffffff, 0x0140001c,
   1416 	0x3c, 0x000f0000, 0x000f0000,
   1417 	0x220, 0xffffffff, 0xC060000C,
   1418 	0x224, 0xc0000fff, 0x00000100,
   1419 	0x20a8, 0xffffffff, 0x00000104,
   1420 	0x55e4, 0xff000fff, 0x00000100,
   1421 	0x30cc, 0xc0000fff, 0x00000104,
   1422 	0xc1e4, 0x00000001, 0x00000001,
   1423 	0xd00c, 0xff000ff0, 0x00000100,
   1424 	0xd80c, 0xff000ff0, 0x00000100
   1425 };
   1426 
   1427 static const u32 hawaii_golden_spm_registers[] =
   1428 {
   1429 	0x30800, 0xe0ffffff, 0xe0000000
   1430 };
   1431 
   1432 static const u32 hawaii_golden_common_registers[] =
   1433 {
   1434 	0x30800, 0xffffffff, 0xe0000000,
   1435 	0x28350, 0xffffffff, 0x3a00161a,
   1436 	0x28354, 0xffffffff, 0x0000002e,
   1437 	0x9a10, 0xffffffff, 0x00018208,
   1438 	0x98f8, 0xffffffff, 0x12011003
   1439 };
   1440 
   1441 static const u32 hawaii_golden_registers[] =
   1442 {
   1443 	0x3354, 0x00000333, 0x00000333,
   1444 	0x9a10, 0x00010000, 0x00058208,
   1445 	0x9830, 0xffffffff, 0x00000000,
   1446 	0x9834, 0xf00fffff, 0x00000400,
   1447 	0x9838, 0x0002021c, 0x00020200,
   1448 	0xc78, 0x00000080, 0x00000000,
   1449 	0x5bb0, 0x000000f0, 0x00000070,
   1450 	0x5bc0, 0xf0311fff, 0x80300000,
   1451 	0x350c, 0x00810000, 0x408af000,
   1452 	0x7030, 0x31000111, 0x00000011,
   1453 	0x2f48, 0x73773777, 0x12010001,
   1454 	0x2120, 0x0000007f, 0x0000001b,
   1455 	0x21dc, 0x00007fb6, 0x00002191,
   1456 	0x3628, 0x0000003f, 0x0000000a,
   1457 	0x362c, 0x0000003f, 0x0000000a,
   1458 	0x2ae4, 0x00073ffe, 0x000022a2,
   1459 	0x240c, 0x000007ff, 0x00000000,
   1460 	0x8bf0, 0x00002001, 0x00000001,
   1461 	0x8b24, 0xffffffff, 0x00ffffff,
   1462 	0x30a04, 0x0000ff0f, 0x00000000,
   1463 	0x28a4c, 0x07ffffff, 0x06000000,
   1464 	0x3e78, 0x00000001, 0x00000002,
   1465 	0xc768, 0x00000008, 0x00000008,
   1466 	0xc770, 0x00000f00, 0x00000800,
   1467 	0xc774, 0x00000f00, 0x00000800,
   1468 	0xc798, 0x00ffffff, 0x00ff7fbf,
   1469 	0xc79c, 0x00ffffff, 0x00ff7faf,
   1470 	0x8c00, 0x000000ff, 0x00000800,
   1471 	0xe40, 0x00001fff, 0x00001fff,
   1472 	0x9060, 0x0000007f, 0x00000020,
   1473 	0x9508, 0x00010000, 0x00010000,
   1474 	0xae00, 0x00100000, 0x000ff07c,
   1475 	0xac14, 0x000003ff, 0x0000000f,
   1476 	0xac10, 0xffffffff, 0x7564fdec,
   1477 	0xac0c, 0xffffffff, 0x3120b9a8,
   1478 	0xac08, 0x20000000, 0x0f9c0000
   1479 };
   1480 
   1481 static const u32 hawaii_mgcg_cgcg_init[] =
   1482 {
   1483 	0xc420, 0xffffffff, 0xfffffffd,
   1484 	0x30800, 0xffffffff, 0xe0000000,
   1485 	0x3c2a0, 0xffffffff, 0x00000100,
   1486 	0x3c208, 0xffffffff, 0x00000100,
   1487 	0x3c2c0, 0xffffffff, 0x00000100,
   1488 	0x3c2c8, 0xffffffff, 0x00000100,
   1489 	0x3c2c4, 0xffffffff, 0x00000100,
   1490 	0x55e4, 0xffffffff, 0x00200100,
   1491 	0x3c280, 0xffffffff, 0x00000100,
   1492 	0x3c214, 0xffffffff, 0x06000100,
   1493 	0x3c220, 0xffffffff, 0x00000100,
   1494 	0x3c218, 0xffffffff, 0x06000100,
   1495 	0x3c204, 0xffffffff, 0x00000100,
   1496 	0x3c2e0, 0xffffffff, 0x00000100,
   1497 	0x3c224, 0xffffffff, 0x00000100,
   1498 	0x3c200, 0xffffffff, 0x00000100,
   1499 	0x3c230, 0xffffffff, 0x00000100,
   1500 	0x3c234, 0xffffffff, 0x00000100,
   1501 	0x3c250, 0xffffffff, 0x00000100,
   1502 	0x3c254, 0xffffffff, 0x00000100,
   1503 	0x3c258, 0xffffffff, 0x00000100,
   1504 	0x3c25c, 0xffffffff, 0x00000100,
   1505 	0x3c260, 0xffffffff, 0x00000100,
   1506 	0x3c27c, 0xffffffff, 0x00000100,
   1507 	0x3c278, 0xffffffff, 0x00000100,
   1508 	0x3c210, 0xffffffff, 0x06000100,
   1509 	0x3c290, 0xffffffff, 0x00000100,
   1510 	0x3c274, 0xffffffff, 0x00000100,
   1511 	0x3c2b4, 0xffffffff, 0x00000100,
   1512 	0x3c2b0, 0xffffffff, 0x00000100,
   1513 	0x3c270, 0xffffffff, 0x00000100,
   1514 	0x30800, 0xffffffff, 0xe0000000,
   1515 	0x3c020, 0xffffffff, 0x00010000,
   1516 	0x3c024, 0xffffffff, 0x00030002,
   1517 	0x3c028, 0xffffffff, 0x00040007,
   1518 	0x3c02c, 0xffffffff, 0x00060005,
   1519 	0x3c030, 0xffffffff, 0x00090008,
   1520 	0x3c034, 0xffffffff, 0x00010000,
   1521 	0x3c038, 0xffffffff, 0x00030002,
   1522 	0x3c03c, 0xffffffff, 0x00040007,
   1523 	0x3c040, 0xffffffff, 0x00060005,
   1524 	0x3c044, 0xffffffff, 0x00090008,
   1525 	0x3c048, 0xffffffff, 0x00010000,
   1526 	0x3c04c, 0xffffffff, 0x00030002,
   1527 	0x3c050, 0xffffffff, 0x00040007,
   1528 	0x3c054, 0xffffffff, 0x00060005,
   1529 	0x3c058, 0xffffffff, 0x00090008,
   1530 	0x3c05c, 0xffffffff, 0x00010000,
   1531 	0x3c060, 0xffffffff, 0x00030002,
   1532 	0x3c064, 0xffffffff, 0x00040007,
   1533 	0x3c068, 0xffffffff, 0x00060005,
   1534 	0x3c06c, 0xffffffff, 0x00090008,
   1535 	0x3c070, 0xffffffff, 0x00010000,
   1536 	0x3c074, 0xffffffff, 0x00030002,
   1537 	0x3c078, 0xffffffff, 0x00040007,
   1538 	0x3c07c, 0xffffffff, 0x00060005,
   1539 	0x3c080, 0xffffffff, 0x00090008,
   1540 	0x3c084, 0xffffffff, 0x00010000,
   1541 	0x3c088, 0xffffffff, 0x00030002,
   1542 	0x3c08c, 0xffffffff, 0x00040007,
   1543 	0x3c090, 0xffffffff, 0x00060005,
   1544 	0x3c094, 0xffffffff, 0x00090008,
   1545 	0x3c098, 0xffffffff, 0x00010000,
   1546 	0x3c09c, 0xffffffff, 0x00030002,
   1547 	0x3c0a0, 0xffffffff, 0x00040007,
   1548 	0x3c0a4, 0xffffffff, 0x00060005,
   1549 	0x3c0a8, 0xffffffff, 0x00090008,
   1550 	0x3c0ac, 0xffffffff, 0x00010000,
   1551 	0x3c0b0, 0xffffffff, 0x00030002,
   1552 	0x3c0b4, 0xffffffff, 0x00040007,
   1553 	0x3c0b8, 0xffffffff, 0x00060005,
   1554 	0x3c0bc, 0xffffffff, 0x00090008,
   1555 	0x3c0c0, 0xffffffff, 0x00010000,
   1556 	0x3c0c4, 0xffffffff, 0x00030002,
   1557 	0x3c0c8, 0xffffffff, 0x00040007,
   1558 	0x3c0cc, 0xffffffff, 0x00060005,
   1559 	0x3c0d0, 0xffffffff, 0x00090008,
   1560 	0x3c0d4, 0xffffffff, 0x00010000,
   1561 	0x3c0d8, 0xffffffff, 0x00030002,
   1562 	0x3c0dc, 0xffffffff, 0x00040007,
   1563 	0x3c0e0, 0xffffffff, 0x00060005,
   1564 	0x3c0e4, 0xffffffff, 0x00090008,
   1565 	0x3c0e8, 0xffffffff, 0x00010000,
   1566 	0x3c0ec, 0xffffffff, 0x00030002,
   1567 	0x3c0f0, 0xffffffff, 0x00040007,
   1568 	0x3c0f4, 0xffffffff, 0x00060005,
   1569 	0x3c0f8, 0xffffffff, 0x00090008,
   1570 	0xc318, 0xffffffff, 0x00020200,
   1571 	0x3350, 0xffffffff, 0x00000200,
   1572 	0x15c0, 0xffffffff, 0x00000400,
   1573 	0x55e8, 0xffffffff, 0x00000000,
   1574 	0x2f50, 0xffffffff, 0x00000902,
   1575 	0x3c000, 0xffffffff, 0x96940200,
   1576 	0x8708, 0xffffffff, 0x00900100,
   1577 	0xc424, 0xffffffff, 0x0020003f,
   1578 	0x38, 0xffffffff, 0x0140001c,
   1579 	0x3c, 0x000f0000, 0x000f0000,
   1580 	0x220, 0xffffffff, 0xc060000c,
   1581 	0x224, 0xc0000fff, 0x00000100,
   1582 	0xf90, 0xffffffff, 0x00000100,
   1583 	0xf98, 0x00000101, 0x00000000,
   1584 	0x20a8, 0xffffffff, 0x00000104,
   1585 	0x55e4, 0xff000fff, 0x00000100,
   1586 	0x30cc, 0xc0000fff, 0x00000104,
   1587 	0xc1e4, 0x00000001, 0x00000001,
   1588 	0xd00c, 0xff000ff0, 0x00000100,
   1589 	0xd80c, 0xff000ff0, 0x00000100
   1590 };
   1591 
   1592 static const u32 godavari_golden_registers[] =
   1593 {
   1594 	0x55e4, 0xff607fff, 0xfc000100,
   1595 	0x6ed8, 0x00010101, 0x00010000,
   1596 	0x9830, 0xffffffff, 0x00000000,
   1597 	0x98302, 0xf00fffff, 0x00000400,
   1598 	0x6130, 0xffffffff, 0x00010000,
   1599 	0x5bb0, 0x000000f0, 0x00000070,
   1600 	0x5bc0, 0xf0311fff, 0x80300000,
   1601 	0x98f8, 0x73773777, 0x12010001,
   1602 	0x98fc, 0xffffffff, 0x00000010,
   1603 	0x8030, 0x00001f0f, 0x0000100a,
   1604 	0x2f48, 0x73773777, 0x12010001,
   1605 	0x2408, 0x000fffff, 0x000c007f,
   1606 	0x8a14, 0xf000003f, 0x00000007,
   1607 	0x8b24, 0xffffffff, 0x00ff0fff,
   1608 	0x30a04, 0x0000ff0f, 0x00000000,
   1609 	0x28a4c, 0x07ffffff, 0x06000000,
   1610 	0x4d8, 0x00000fff, 0x00000100,
   1611 	0xd014, 0x00010000, 0x00810001,
   1612 	0xd814, 0x00010000, 0x00810001,
   1613 	0x3e78, 0x00000001, 0x00000002,
   1614 	0xc768, 0x00000008, 0x00000008,
   1615 	0xc770, 0x00000f00, 0x00000800,
   1616 	0xc774, 0x00000f00, 0x00000800,
   1617 	0xc798, 0x00ffffff, 0x00ff7fbf,
   1618 	0xc79c, 0x00ffffff, 0x00ff7faf,
   1619 	0x8c00, 0x000000ff, 0x00000001,
   1620 	0x214f8, 0x01ff01ff, 0x00000002,
   1621 	0x21498, 0x007ff800, 0x00200000,
   1622 	0x2015c, 0xffffffff, 0x00000f40,
   1623 	0x88c4, 0x001f3ae3, 0x00000082,
   1624 	0x88d4, 0x0000001f, 0x00000010,
   1625 	0x30934, 0xffffffff, 0x00000000
   1626 };
   1627 
   1628 
   1629 static void cik_init_golden_registers(struct radeon_device *rdev)
   1630 {
   1631 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
   1632 	mutex_lock(&rdev->grbm_idx_mutex);
   1633 	switch (rdev->family) {
   1634 	case CHIP_BONAIRE:
   1635 		radeon_program_register_sequence(rdev,
   1636 						 bonaire_mgcg_cgcg_init,
   1637 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
   1638 		radeon_program_register_sequence(rdev,
   1639 						 bonaire_golden_registers,
   1640 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
   1641 		radeon_program_register_sequence(rdev,
   1642 						 bonaire_golden_common_registers,
   1643 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
   1644 		radeon_program_register_sequence(rdev,
   1645 						 bonaire_golden_spm_registers,
   1646 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
   1647 		break;
   1648 	case CHIP_KABINI:
   1649 		radeon_program_register_sequence(rdev,
   1650 						 kalindi_mgcg_cgcg_init,
   1651 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
   1652 		radeon_program_register_sequence(rdev,
   1653 						 kalindi_golden_registers,
   1654 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
   1655 		radeon_program_register_sequence(rdev,
   1656 						 kalindi_golden_common_registers,
   1657 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
   1658 		radeon_program_register_sequence(rdev,
   1659 						 kalindi_golden_spm_registers,
   1660 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
   1661 		break;
   1662 	case CHIP_MULLINS:
   1663 		radeon_program_register_sequence(rdev,
   1664 						 kalindi_mgcg_cgcg_init,
   1665 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
   1666 		radeon_program_register_sequence(rdev,
   1667 						 godavari_golden_registers,
   1668 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
   1669 		radeon_program_register_sequence(rdev,
   1670 						 kalindi_golden_common_registers,
   1671 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
   1672 		radeon_program_register_sequence(rdev,
   1673 						 kalindi_golden_spm_registers,
   1674 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
   1675 		break;
   1676 	case CHIP_KAVERI:
   1677 		radeon_program_register_sequence(rdev,
   1678 						 spectre_mgcg_cgcg_init,
   1679 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
   1680 		radeon_program_register_sequence(rdev,
   1681 						 spectre_golden_registers,
   1682 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
   1683 		radeon_program_register_sequence(rdev,
   1684 						 spectre_golden_common_registers,
   1685 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
   1686 		radeon_program_register_sequence(rdev,
   1687 						 spectre_golden_spm_registers,
   1688 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
   1689 		break;
   1690 	case CHIP_HAWAII:
   1691 		radeon_program_register_sequence(rdev,
   1692 						 hawaii_mgcg_cgcg_init,
   1693 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
   1694 		radeon_program_register_sequence(rdev,
   1695 						 hawaii_golden_registers,
   1696 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
   1697 		radeon_program_register_sequence(rdev,
   1698 						 hawaii_golden_common_registers,
   1699 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
   1700 		radeon_program_register_sequence(rdev,
   1701 						 hawaii_golden_spm_registers,
   1702 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
   1703 		break;
   1704 	default:
   1705 		break;
   1706 	}
   1707 	mutex_unlock(&rdev->grbm_idx_mutex);
   1708 }
   1709 
   1710 /**
   1711  * cik_get_xclk - get the xclk
   1712  *
   1713  * @rdev: radeon_device pointer
   1714  *
   1715  * Returns the reference clock used by the gfx engine
   1716  * (CIK).
   1717  */
   1718 u32 cik_get_xclk(struct radeon_device *rdev)
   1719 {
   1720         u32 reference_clock = rdev->clock.spll.reference_freq;
   1721 
   1722 	if (rdev->flags & RADEON_IS_IGP) {
   1723 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
   1724 			return reference_clock / 2;
   1725 	} else {
   1726 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
   1727 			return reference_clock / 4;
   1728 	}
   1729 	return reference_clock;
   1730 }
   1731 
   1732 /**
   1733  * cik_mm_rdoorbell - read a doorbell dword
   1734  *
   1735  * @rdev: radeon_device pointer
   1736  * @index: doorbell index
   1737  *
   1738  * Returns the value in the doorbell aperture at the
   1739  * requested doorbell index (CIK).
   1740  */
   1741 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
   1742 {
   1743 	if (index < rdev->doorbell.num_doorbells) {
   1744 #ifdef __NetBSD__
   1745 		return bus_space_read_4(rdev->doorbell.bst, rdev->doorbell.bsh,
   1746 		    index*4);
   1747 #else
   1748 		return readl(rdev->doorbell.ptr + index);
   1749 #endif
   1750 	} else {
   1751 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
   1752 		return 0;
   1753 	}
   1754 }
   1755 
   1756 /**
   1757  * cik_mm_wdoorbell - write a doorbell dword
   1758  *
   1759  * @rdev: radeon_device pointer
   1760  * @index: doorbell index
   1761  * @v: value to write
   1762  *
   1763  * Writes @v to the doorbell aperture at the
   1764  * requested doorbell index (CIK).
   1765  */
   1766 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
   1767 {
   1768 	if (index < rdev->doorbell.num_doorbells) {
   1769 #ifdef __NetBSD__
   1770 		bus_space_write_4(rdev->doorbell.bst, rdev->doorbell.bsh,
   1771 		    index*4, v);
   1772 #else
   1773 		writel(v, rdev->doorbell.ptr + index);
   1774 #endif
   1775 	} else {
   1776 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
   1777 	}
   1778 }
   1779 
   1780 #define BONAIRE_IO_MC_REGS_SIZE 36
   1781 
   1782 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
   1783 {
   1784 	{0x00000070, 0x04400000},
   1785 	{0x00000071, 0x80c01803},
   1786 	{0x00000072, 0x00004004},
   1787 	{0x00000073, 0x00000100},
   1788 	{0x00000074, 0x00ff0000},
   1789 	{0x00000075, 0x34000000},
   1790 	{0x00000076, 0x08000014},
   1791 	{0x00000077, 0x00cc08ec},
   1792 	{0x00000078, 0x00000400},
   1793 	{0x00000079, 0x00000000},
   1794 	{0x0000007a, 0x04090000},
   1795 	{0x0000007c, 0x00000000},
   1796 	{0x0000007e, 0x4408a8e8},
   1797 	{0x0000007f, 0x00000304},
   1798 	{0x00000080, 0x00000000},
   1799 	{0x00000082, 0x00000001},
   1800 	{0x00000083, 0x00000002},
   1801 	{0x00000084, 0xf3e4f400},
   1802 	{0x00000085, 0x052024e3},
   1803 	{0x00000087, 0x00000000},
   1804 	{0x00000088, 0x01000000},
   1805 	{0x0000008a, 0x1c0a0000},
   1806 	{0x0000008b, 0xff010000},
   1807 	{0x0000008d, 0xffffefff},
   1808 	{0x0000008e, 0xfff3efff},
   1809 	{0x0000008f, 0xfff3efbf},
   1810 	{0x00000092, 0xf7ffffff},
   1811 	{0x00000093, 0xffffff7f},
   1812 	{0x00000095, 0x00101101},
   1813 	{0x00000096, 0x00000fff},
   1814 	{0x00000097, 0x00116fff},
   1815 	{0x00000098, 0x60010000},
   1816 	{0x00000099, 0x10010000},
   1817 	{0x0000009a, 0x00006000},
   1818 	{0x0000009b, 0x00001000},
   1819 	{0x0000009f, 0x00b48000}
   1820 };
   1821 
   1822 #define HAWAII_IO_MC_REGS_SIZE 22
   1823 
   1824 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
   1825 {
   1826 	{0x0000007d, 0x40000000},
   1827 	{0x0000007e, 0x40180304},
   1828 	{0x0000007f, 0x0000ff00},
   1829 	{0x00000081, 0x00000000},
   1830 	{0x00000083, 0x00000800},
   1831 	{0x00000086, 0x00000000},
   1832 	{0x00000087, 0x00000100},
   1833 	{0x00000088, 0x00020100},
   1834 	{0x00000089, 0x00000000},
   1835 	{0x0000008b, 0x00040000},
   1836 	{0x0000008c, 0x00000100},
   1837 	{0x0000008e, 0xff010000},
   1838 	{0x00000090, 0xffffefff},
   1839 	{0x00000091, 0xfff3efff},
   1840 	{0x00000092, 0xfff3efbf},
   1841 	{0x00000093, 0xf7ffffff},
   1842 	{0x00000094, 0xffffff7f},
   1843 	{0x00000095, 0x00000fff},
   1844 	{0x00000096, 0x00116fff},
   1845 	{0x00000097, 0x60010000},
   1846 	{0x00000098, 0x10010000},
   1847 	{0x0000009f, 0x00c79000}
   1848 };
   1849 
   1850 
   1851 /**
   1852  * cik_srbm_select - select specific register instances
   1853  *
   1854  * @rdev: radeon_device pointer
   1855  * @me: selected ME (micro engine)
   1856  * @pipe: pipe
   1857  * @queue: queue
   1858  * @vmid: VMID
   1859  *
   1860  * Switches the currently active registers instances.  Some
   1861  * registers are instanced per VMID, others are instanced per
   1862  * me/pipe/queue combination.
   1863  */
   1864 static void cik_srbm_select(struct radeon_device *rdev,
   1865 			    u32 me, u32 pipe, u32 queue, u32 vmid)
   1866 {
   1867 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
   1868 			     MEID(me & 0x3) |
   1869 			     VMID(vmid & 0xf) |
   1870 			     QUEUEID(queue & 0x7));
   1871 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
   1872 }
   1873 
   1874 /* ucode loading */
   1875 /**
   1876  * ci_mc_load_microcode - load MC ucode into the hw
   1877  *
   1878  * @rdev: radeon_device pointer
   1879  *
   1880  * Load the GDDR MC ucode into the hw (CIK).
   1881  * Returns 0 on success, error on failure.
   1882  */
   1883 int ci_mc_load_microcode(struct radeon_device *rdev)
   1884 {
   1885 	const __be32 *fw_data = NULL;
   1886 	const __le32 *new_fw_data = NULL;
   1887 	u32 running, blackout = 0, tmp;
   1888 	const u32 *io_mc_regs = NULL;
   1889 	const __le32 *new_io_mc_regs = NULL;
   1890 	int i, regs_size, ucode_size;
   1891 
   1892 	if (!rdev->mc_fw)
   1893 		return -EINVAL;
   1894 
   1895 	if (rdev->new_fw) {
   1896 		const struct mc_firmware_header_v1_0 *hdr =
   1897 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
   1898 
   1899 		radeon_ucode_print_mc_hdr(&hdr->header);
   1900 
   1901 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
   1902 		new_io_mc_regs = (const __le32 *)
   1903 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
   1904 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
   1905 		new_fw_data = (const __le32 *)
   1906 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   1907 	} else {
   1908 		ucode_size = rdev->mc_fw->size / 4;
   1909 
   1910 		switch (rdev->family) {
   1911 		case CHIP_BONAIRE:
   1912 			io_mc_regs = &bonaire_io_mc_regs[0][0];
   1913 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
   1914 			break;
   1915 		case CHIP_HAWAII:
   1916 			io_mc_regs = &hawaii_io_mc_regs[0][0];
   1917 			regs_size = HAWAII_IO_MC_REGS_SIZE;
   1918 			break;
   1919 		default:
   1920 			return -EINVAL;
   1921 		}
   1922 		fw_data = (const __be32 *)rdev->mc_fw->data;
   1923 	}
   1924 
   1925 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
   1926 
   1927 	if (running == 0) {
   1928 		if (running) {
   1929 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
   1930 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
   1931 		}
   1932 
   1933 		/* reset the engine and set to writable */
   1934 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
   1935 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
   1936 
   1937 		/* load mc io regs */
   1938 		for (i = 0; i < regs_size; i++) {
   1939 			if (rdev->new_fw) {
   1940 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
   1941 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
   1942 			} else {
   1943 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
   1944 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
   1945 			}
   1946 		}
   1947 
   1948 		tmp = RREG32(MC_SEQ_MISC0);
   1949 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
   1950 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
   1951 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
   1952 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
   1953 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
   1954 		}
   1955 
   1956 		/* load the MC ucode */
   1957 		for (i = 0; i < ucode_size; i++) {
   1958 			if (rdev->new_fw)
   1959 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
   1960 			else
   1961 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
   1962 		}
   1963 
   1964 		/* put the engine back into the active state */
   1965 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
   1966 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
   1967 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
   1968 
   1969 		/* wait for training to complete */
   1970 		for (i = 0; i < rdev->usec_timeout; i++) {
   1971 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
   1972 				break;
   1973 			udelay(1);
   1974 		}
   1975 		for (i = 0; i < rdev->usec_timeout; i++) {
   1976 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
   1977 				break;
   1978 			udelay(1);
   1979 		}
   1980 
   1981 		if (running)
   1982 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
   1983 	}
   1984 
   1985 	return 0;
   1986 }
   1987 
   1988 /**
   1989  * cik_init_microcode - load ucode images from disk
   1990  *
   1991  * @rdev: radeon_device pointer
   1992  *
   1993  * Use the firmware interface to load the ucode images into
   1994  * the driver (not loaded into hw).
   1995  * Returns 0 on success, error on failure.
   1996  */
   1997 static int cik_init_microcode(struct radeon_device *rdev)
   1998 {
   1999 	const char *chip_name;
   2000 	const char *new_chip_name;
   2001 	size_t pfp_req_size, me_req_size, ce_req_size,
   2002 		mec_req_size, rlc_req_size, mc_req_size = 0,
   2003 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
   2004 	char fw_name[30];
   2005 	int new_fw = 0;
   2006 	int err;
   2007 	int num_fw;
   2008 
   2009 	DRM_DEBUG("\n");
   2010 
   2011 	switch (rdev->family) {
   2012 	case CHIP_BONAIRE:
   2013 		chip_name = "BONAIRE";
   2014 		new_chip_name = "bonaire";
   2015 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2016 		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2017 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2018 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2019 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
   2020 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
   2021 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
   2022 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2023 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
   2024 		smc_req_size = round_up(BONAIRE_SMC_UCODE_SIZE, 4);
   2025 #else
   2026 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
   2027 #endif
   2028 		num_fw = 8;
   2029 		break;
   2030 	case CHIP_HAWAII:
   2031 		chip_name = "HAWAII";
   2032 		new_chip_name = "hawaii";
   2033 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2034 		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2035 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2036 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2037 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
   2038 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
   2039 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
   2040 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2041 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
   2042 		smc_req_size = round_up(HAWAII_SMC_UCODE_SIZE, 4);
   2043 #else
   2044 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
   2045 #endif
   2046 		num_fw = 8;
   2047 		break;
   2048 	case CHIP_KAVERI:
   2049 		chip_name = "KAVERI";
   2050 		new_chip_name = "kaveri";
   2051 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2052 		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2053 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2054 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2055 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
   2056 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2057 		num_fw = 7;
   2058 		break;
   2059 	case CHIP_KABINI:
   2060 		chip_name = "KABINI";
   2061 		new_chip_name = "kabini";
   2062 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2063 		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2064 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2065 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2066 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
   2067 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2068 		num_fw = 6;
   2069 		break;
   2070 	case CHIP_MULLINS:
   2071 		chip_name = "MULLINS";
   2072 		new_chip_name = "mullins";
   2073 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2074 		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2075 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2076 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2077 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
   2078 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2079 		num_fw = 6;
   2080 		break;
   2081 	default: BUG();
   2082 	}
   2083 
   2084 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
   2085 
   2086 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
   2087 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
   2088 	if (err) {
   2089 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
   2090 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
   2091 		if (err)
   2092 			goto out;
   2093 		if (rdev->pfp_fw->size != pfp_req_size) {
   2094 			printk(KERN_ERR
   2095 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2096 			       rdev->pfp_fw->size, fw_name);
   2097 			err = -EINVAL;
   2098 			goto out;
   2099 		}
   2100 	} else {
   2101 		err = radeon_ucode_validate(rdev->pfp_fw);
   2102 		if (err) {
   2103 			printk(KERN_ERR
   2104 			       "cik_fw: validation failed for firmware \"%s\"\n",
   2105 			       fw_name);
   2106 			goto out;
   2107 		} else {
   2108 			new_fw++;
   2109 		}
   2110 	}
   2111 
   2112 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
   2113 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
   2114 	if (err) {
   2115 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
   2116 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
   2117 		if (err)
   2118 			goto out;
   2119 		if (rdev->me_fw->size != me_req_size) {
   2120 			printk(KERN_ERR
   2121 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2122 			       rdev->me_fw->size, fw_name);
   2123 			err = -EINVAL;
   2124 		}
   2125 	} else {
   2126 		err = radeon_ucode_validate(rdev->me_fw);
   2127 		if (err) {
   2128 			printk(KERN_ERR
   2129 			       "cik_fw: validation failed for firmware \"%s\"\n",
   2130 			       fw_name);
   2131 			goto out;
   2132 		} else {
   2133 			new_fw++;
   2134 		}
   2135 	}
   2136 
   2137 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
   2138 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
   2139 	if (err) {
   2140 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
   2141 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
   2142 		if (err)
   2143 			goto out;
   2144 		if (rdev->ce_fw->size != ce_req_size) {
   2145 			printk(KERN_ERR
   2146 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2147 			       rdev->ce_fw->size, fw_name);
   2148 			err = -EINVAL;
   2149 		}
   2150 	} else {
   2151 		err = radeon_ucode_validate(rdev->ce_fw);
   2152 		if (err) {
   2153 			printk(KERN_ERR
   2154 			       "cik_fw: validation failed for firmware \"%s\"\n",
   2155 			       fw_name);
   2156 			goto out;
   2157 		} else {
   2158 			new_fw++;
   2159 		}
   2160 	}
   2161 
   2162 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
   2163 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
   2164 	if (err) {
   2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
   2166 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
   2167 		if (err)
   2168 			goto out;
   2169 		if (rdev->mec_fw->size != mec_req_size) {
   2170 			printk(KERN_ERR
   2171 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2172 			       rdev->mec_fw->size, fw_name);
   2173 			err = -EINVAL;
   2174 		}
   2175 	} else {
   2176 		err = radeon_ucode_validate(rdev->mec_fw);
   2177 		if (err) {
   2178 			printk(KERN_ERR
   2179 			       "cik_fw: validation failed for firmware \"%s\"\n",
   2180 			       fw_name);
   2181 			goto out;
   2182 		} else {
   2183 			new_fw++;
   2184 		}
   2185 	}
   2186 
   2187 	if (rdev->family == CHIP_KAVERI) {
   2188 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
   2189 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
   2190 		if (err) {
   2191 			goto out;
   2192 		} else {
   2193 			err = radeon_ucode_validate(rdev->mec2_fw);
   2194 			if (err) {
   2195 				goto out;
   2196 			} else {
   2197 				new_fw++;
   2198 			}
   2199 		}
   2200 	}
   2201 
   2202 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
   2203 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
   2204 	if (err) {
   2205 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
   2206 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
   2207 		if (err)
   2208 			goto out;
   2209 		if (rdev->rlc_fw->size != rlc_req_size) {
   2210 			printk(KERN_ERR
   2211 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
   2212 			       rdev->rlc_fw->size, fw_name);
   2213 			err = -EINVAL;
   2214 		}
   2215 	} else {
   2216 		err = radeon_ucode_validate(rdev->rlc_fw);
   2217 		if (err) {
   2218 			printk(KERN_ERR
   2219 			       "cik_fw: validation failed for firmware \"%s\"\n",
   2220 			       fw_name);
   2221 			goto out;
   2222 		} else {
   2223 			new_fw++;
   2224 		}
   2225 	}
   2226 
   2227 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
   2228 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
   2229 	if (err) {
   2230 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
   2231 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
   2232 		if (err)
   2233 			goto out;
   2234 		if (rdev->sdma_fw->size != sdma_req_size) {
   2235 			printk(KERN_ERR
   2236 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
   2237 			       rdev->sdma_fw->size, fw_name);
   2238 			err = -EINVAL;
   2239 		}
   2240 	} else {
   2241 		err = radeon_ucode_validate(rdev->sdma_fw);
   2242 		if (err) {
   2243 			printk(KERN_ERR
   2244 			       "cik_fw: validation failed for firmware \"%s\"\n",
   2245 			       fw_name);
   2246 			goto out;
   2247 		} else {
   2248 			new_fw++;
   2249 		}
   2250 	}
   2251 
   2252 	/* No SMC, MC ucode on APUs */
   2253 	if (!(rdev->flags & RADEON_IS_IGP)) {
   2254 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
   2255 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
   2256 		if (err) {
   2257 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
   2258 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
   2259 			if (err) {
   2260 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
   2261 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
   2262 				if (err)
   2263 					goto out;
   2264 			}
   2265 			if ((rdev->mc_fw->size != mc_req_size) &&
   2266 			    (rdev->mc_fw->size != mc2_req_size)){
   2267 				printk(KERN_ERR
   2268 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
   2269 				       rdev->mc_fw->size, fw_name);
   2270 				err = -EINVAL;
   2271 			}
   2272 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
   2273 		} else {
   2274 			err = radeon_ucode_validate(rdev->mc_fw);
   2275 			if (err) {
   2276 				printk(KERN_ERR
   2277 				       "cik_fw: validation failed for firmware \"%s\"\n",
   2278 				       fw_name);
   2279 				goto out;
   2280 			} else {
   2281 				new_fw++;
   2282 			}
   2283 		}
   2284 
   2285 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
   2286 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
   2287 		if (err) {
   2288 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
   2289 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
   2290 			if (err) {
   2291 				printk(KERN_ERR
   2292 				       "smc: error loading firmware \"%s\"\n",
   2293 				       fw_name);
   2294 				release_firmware(rdev->smc_fw);
   2295 				rdev->smc_fw = NULL;
   2296 				err = 0;
   2297 			} else if (rdev->smc_fw->size != smc_req_size) {
   2298 				printk(KERN_ERR
   2299 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
   2300 				       rdev->smc_fw->size, fw_name);
   2301 				err = -EINVAL;
   2302 			}
   2303 		} else {
   2304 			err = radeon_ucode_validate(rdev->smc_fw);
   2305 			if (err) {
   2306 				printk(KERN_ERR
   2307 				       "cik_fw: validation failed for firmware \"%s\"\n",
   2308 				       fw_name);
   2309 				goto out;
   2310 			} else {
   2311 				new_fw++;
   2312 			}
   2313 		}
   2314 	}
   2315 
   2316 	if (new_fw == 0) {
   2317 		rdev->new_fw = false;
   2318 	} else if (new_fw < num_fw) {
   2319 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
   2320 		err = -EINVAL;
   2321 	} else {
   2322 		rdev->new_fw = true;
   2323 	}
   2324 
   2325 out:
   2326 	if (err) {
   2327 		if (err != -EINVAL)
   2328 			printk(KERN_ERR
   2329 			       "cik_cp: Failed to load firmware \"%s\"\n",
   2330 			       fw_name);
   2331 		release_firmware(rdev->pfp_fw);
   2332 		rdev->pfp_fw = NULL;
   2333 		release_firmware(rdev->me_fw);
   2334 		rdev->me_fw = NULL;
   2335 		release_firmware(rdev->ce_fw);
   2336 		rdev->ce_fw = NULL;
   2337 		release_firmware(rdev->mec_fw);
   2338 		rdev->mec_fw = NULL;
   2339 		release_firmware(rdev->mec2_fw);
   2340 		rdev->mec2_fw = NULL;
   2341 		release_firmware(rdev->rlc_fw);
   2342 		rdev->rlc_fw = NULL;
   2343 		release_firmware(rdev->sdma_fw);
   2344 		rdev->sdma_fw = NULL;
   2345 		release_firmware(rdev->mc_fw);
   2346 		rdev->mc_fw = NULL;
   2347 		release_firmware(rdev->smc_fw);
   2348 		rdev->smc_fw = NULL;
   2349 	}
   2350 	return err;
   2351 }
   2352 
   2353 /*
   2354  * Core functions
   2355  */
   2356 /**
   2357  * cik_tiling_mode_table_init - init the hw tiling table
   2358  *
   2359  * @rdev: radeon_device pointer
   2360  *
   2361  * Starting with SI, the tiling setup is done globally in a
   2362  * set of 32 tiling modes.  Rather than selecting each set of
   2363  * parameters per surface as on older asics, we just select
   2364  * which index in the tiling table we want to use, and the
   2365  * surface uses those parameters (CIK).
   2366  */
   2367 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
   2368 {
   2369 	const u32 num_tile_mode_states = 32;
   2370 	const u32 num_secondary_tile_mode_states = 16;
   2371 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
   2372 	u32 num_pipe_configs;
   2373 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
   2374 		rdev->config.cik.max_shader_engines;
   2375 
   2376 	switch (rdev->config.cik.mem_row_size_in_kb) {
   2377 	case 1:
   2378 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
   2379 		break;
   2380 	case 2:
   2381 	default:
   2382 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
   2383 		break;
   2384 	case 4:
   2385 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
   2386 		break;
   2387 	}
   2388 
   2389 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
   2390 	if (num_pipe_configs > 8)
   2391 		num_pipe_configs = 16;
   2392 
   2393 	if (num_pipe_configs == 16) {
   2394 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
   2395 			switch (reg_offset) {
   2396 			case 0:
   2397 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2398 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2399 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2400 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2401 				break;
   2402 			case 1:
   2403 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2404 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2406 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2407 				break;
   2408 			case 2:
   2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2413 				break;
   2414 			case 3:
   2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2418 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2419 				break;
   2420 			case 4:
   2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2422 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2423 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2424 						 TILE_SPLIT(split_equal_to_row_size));
   2425 				break;
   2426 			case 5:
   2427 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2428 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2429 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2430 				break;
   2431 			case 6:
   2432 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2433 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2434 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2435 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2436 				break;
   2437 			case 7:
   2438 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2439 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2440 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2441 						 TILE_SPLIT(split_equal_to_row_size));
   2442 				break;
   2443 			case 8:
   2444 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2445 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
   2446 				break;
   2447 			case 9:
   2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2450 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   2451 				break;
   2452 			case 10:
   2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2457 				break;
   2458 			case 11:
   2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
   2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2463 				break;
   2464 			case 12:
   2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2466 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2467 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2468 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2469 				break;
   2470 			case 13:
   2471 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2473 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2474 				break;
   2475 			case 14:
   2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2480 				break;
   2481 			case 16:
   2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
   2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2486 				break;
   2487 			case 17:
   2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2489 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2490 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2491 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2492 				break;
   2493 			case 27:
   2494 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2495 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2496 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   2497 				break;
   2498 			case 28:
   2499 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2500 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2501 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2502 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2503 				break;
   2504 			case 29:
   2505 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2506 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2507 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
   2508 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2509 				break;
   2510 			case 30:
   2511 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2512 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2513 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2514 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2515 				break;
   2516 			default:
   2517 				gb_tile_moden = 0;
   2518 				break;
   2519 			}
   2520 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
   2521 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   2522 		}
   2523 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
   2524 			switch (reg_offset) {
   2525 			case 0:
   2526 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2529 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2530 				break;
   2531 			case 1:
   2532 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2533 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2534 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2535 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2536 				break;
   2537 			case 2:
   2538 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2539 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2540 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2541 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2542 				break;
   2543 			case 3:
   2544 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2545 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2546 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2547 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2548 				break;
   2549 			case 4:
   2550 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2553 						 NUM_BANKS(ADDR_SURF_8_BANK));
   2554 				break;
   2555 			case 5:
   2556 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2559 						 NUM_BANKS(ADDR_SURF_4_BANK));
   2560 				break;
   2561 			case 6:
   2562 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2563 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2564 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2565 						 NUM_BANKS(ADDR_SURF_2_BANK));
   2566 				break;
   2567 			case 8:
   2568 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2569 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2570 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2571 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2572 				break;
   2573 			case 9:
   2574 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2575 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2576 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2577 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2578 				break;
   2579 			case 10:
   2580 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2581 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2582 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2583 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2584 				break;
   2585 			case 11:
   2586 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2587 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2588 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2589 						 NUM_BANKS(ADDR_SURF_8_BANK));
   2590 				break;
   2591 			case 12:
   2592 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2593 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2594 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2595 						 NUM_BANKS(ADDR_SURF_4_BANK));
   2596 				break;
   2597 			case 13:
   2598 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2599 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2600 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2601 						 NUM_BANKS(ADDR_SURF_2_BANK));
   2602 				break;
   2603 			case 14:
   2604 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2605 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2606 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2607 						 NUM_BANKS(ADDR_SURF_2_BANK));
   2608 				break;
   2609 			default:
   2610 				gb_tile_moden = 0;
   2611 				break;
   2612 			}
   2613 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
   2614 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   2615 		}
   2616 	} else if (num_pipe_configs == 8) {
   2617 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
   2618 			switch (reg_offset) {
   2619 			case 0:
   2620 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2621 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2622 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2623 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2624 				break;
   2625 			case 1:
   2626 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2627 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2629 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2630 				break;
   2631 			case 2:
   2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2636 				break;
   2637 			case 3:
   2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2641 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2642 				break;
   2643 			case 4:
   2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2645 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2646 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2647 						 TILE_SPLIT(split_equal_to_row_size));
   2648 				break;
   2649 			case 5:
   2650 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2651 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2652 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2653 				break;
   2654 			case 6:
   2655 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2656 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2657 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2658 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2659 				break;
   2660 			case 7:
   2661 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2662 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2663 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2664 						 TILE_SPLIT(split_equal_to_row_size));
   2665 				break;
   2666 			case 8:
   2667 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2668 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
   2669 				break;
   2670 			case 9:
   2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2673 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   2674 				break;
   2675 			case 10:
   2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2680 				break;
   2681 			case 11:
   2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
   2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2686 				break;
   2687 			case 12:
   2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2689 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2691 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2692 				break;
   2693 			case 13:
   2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2696 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2697 				break;
   2698 			case 14:
   2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2703 				break;
   2704 			case 16:
   2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
   2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2709 				break;
   2710 			case 17:
   2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2712 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2714 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2715 				break;
   2716 			case 27:
   2717 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2718 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2719 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   2720 				break;
   2721 			case 28:
   2722 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2723 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2724 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2725 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2726 				break;
   2727 			case 29:
   2728 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2729 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2730 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
   2731 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2732 				break;
   2733 			case 30:
   2734 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2735 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2736 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2737 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2738 				break;
   2739 			default:
   2740 				gb_tile_moden = 0;
   2741 				break;
   2742 			}
   2743 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
   2744 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   2745 		}
   2746 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
   2747 			switch (reg_offset) {
   2748 			case 0:
   2749 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2750 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2751 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2752 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2753 				break;
   2754 			case 1:
   2755 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2756 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2757 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2758 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2759 				break;
   2760 			case 2:
   2761 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2762 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2763 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2764 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2765 				break;
   2766 			case 3:
   2767 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2768 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2769 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2770 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2771 				break;
   2772 			case 4:
   2773 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2774 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2775 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2776 						 NUM_BANKS(ADDR_SURF_8_BANK));
   2777 				break;
   2778 			case 5:
   2779 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2780 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2781 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2782 						 NUM_BANKS(ADDR_SURF_4_BANK));
   2783 				break;
   2784 			case 6:
   2785 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2786 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2787 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2788 						 NUM_BANKS(ADDR_SURF_2_BANK));
   2789 				break;
   2790 			case 8:
   2791 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2792 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2793 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2794 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2795 				break;
   2796 			case 9:
   2797 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2798 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2799 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2800 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2801 				break;
   2802 			case 10:
   2803 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2804 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2805 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2806 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2807 				break;
   2808 			case 11:
   2809 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2810 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2811 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2812 						 NUM_BANKS(ADDR_SURF_16_BANK));
   2813 				break;
   2814 			case 12:
   2815 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2816 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2817 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2818 						 NUM_BANKS(ADDR_SURF_8_BANK));
   2819 				break;
   2820 			case 13:
   2821 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2822 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2823 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2824 						 NUM_BANKS(ADDR_SURF_4_BANK));
   2825 				break;
   2826 			case 14:
   2827 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2828 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2829 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2830 						 NUM_BANKS(ADDR_SURF_2_BANK));
   2831 				break;
   2832 			default:
   2833 				gb_tile_moden = 0;
   2834 				break;
   2835 			}
   2836 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
   2837 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   2838 		}
   2839 	} else if (num_pipe_configs == 4) {
   2840 		if (num_rbs == 4) {
   2841 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
   2842 				switch (reg_offset) {
   2843 				case 0:
   2844 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2845 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2846 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2847 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2848 					break;
   2849 				case 1:
   2850 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2851 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2853 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2854 					break;
   2855 				case 2:
   2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2860 					break;
   2861 				case 3:
   2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2865 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2866 					break;
   2867 				case 4:
   2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2869 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2870 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2871 							 TILE_SPLIT(split_equal_to_row_size));
   2872 					break;
   2873 				case 5:
   2874 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2875 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2876 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2877 					break;
   2878 				case 6:
   2879 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2880 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2881 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2882 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2883 					break;
   2884 				case 7:
   2885 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2886 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2887 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2888 							 TILE_SPLIT(split_equal_to_row_size));
   2889 					break;
   2890 				case 8:
   2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2892 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
   2893 					break;
   2894 				case 9:
   2895 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   2898 					break;
   2899 				case 10:
   2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2902 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2904 					break;
   2905 				case 11:
   2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2908 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2910 					break;
   2911 				case 12:
   2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2913 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2914 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2915 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2916 					break;
   2917 				case 13:
   2918 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2920 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2921 					break;
   2922 				case 14:
   2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2925 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2927 					break;
   2928 				case 16:
   2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2933 					break;
   2934 				case 17:
   2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2937 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2939 					break;
   2940 				case 27:
   2941 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2942 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2943 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   2944 					break;
   2945 				case 28:
   2946 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2947 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2948 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2949 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2950 					break;
   2951 				case 29:
   2952 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2953 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2954 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2955 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2956 					break;
   2957 				case 30:
   2958 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2959 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2960 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2961 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2962 					break;
   2963 				default:
   2964 					gb_tile_moden = 0;
   2965 					break;
   2966 				}
   2967 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
   2968 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   2969 			}
   2970 		} else if (num_rbs < 4) {
   2971 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
   2972 				switch (reg_offset) {
   2973 				case 0:
   2974 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2975 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2976 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2977 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2978 					break;
   2979 				case 1:
   2980 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2981 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2983 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2984 					break;
   2985 				case 2:
   2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2990 					break;
   2991 				case 3:
   2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2995 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2996 					break;
   2997 				case 4:
   2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2999 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3000 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3001 							 TILE_SPLIT(split_equal_to_row_size));
   3002 					break;
   3003 				case 5:
   3004 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3005 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3006 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3007 					break;
   3008 				case 6:
   3009 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3010 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3011 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3012 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   3013 					break;
   3014 				case 7:
   3015 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3016 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3017 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3018 							 TILE_SPLIT(split_equal_to_row_size));
   3019 					break;
   3020 				case 8:
   3021 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   3022 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
   3023 					break;
   3024 				case 9:
   3025 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   3028 					break;
   3029 				case 10:
   3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3034 					break;
   3035 				case 11:
   3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3040 					break;
   3041 				case 12:
   3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3044 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3046 					break;
   3047 				case 13:
   3048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3050 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   3051 					break;
   3052 				case 14:
   3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3057 					break;
   3058 				case 16:
   3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3063 					break;
   3064 				case 17:
   3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3066 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3067 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3068 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3069 					break;
   3070 				case 27:
   3071 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3072 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3073 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   3074 					break;
   3075 				case 28:
   3076 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3077 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3078 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3079 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3080 					break;
   3081 				case 29:
   3082 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3083 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3084 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3085 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3086 					break;
   3087 				case 30:
   3088 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3089 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3090 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   3091 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3092 					break;
   3093 				default:
   3094 					gb_tile_moden = 0;
   3095 					break;
   3096 				}
   3097 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
   3098 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   3099 			}
   3100 		}
   3101 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
   3102 			switch (reg_offset) {
   3103 			case 0:
   3104 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3105 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3106 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3107 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3108 				break;
   3109 			case 1:
   3110 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3111 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3112 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3113 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3114 				break;
   3115 			case 2:
   3116 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3117 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3118 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3119 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3120 				break;
   3121 			case 3:
   3122 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3123 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3124 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3125 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3126 				break;
   3127 			case 4:
   3128 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3129 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3130 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3131 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3132 				break;
   3133 			case 5:
   3134 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3135 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3136 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3137 						 NUM_BANKS(ADDR_SURF_8_BANK));
   3138 				break;
   3139 			case 6:
   3140 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3141 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3142 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   3143 						 NUM_BANKS(ADDR_SURF_4_BANK));
   3144 				break;
   3145 			case 8:
   3146 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3147 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   3148 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3149 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3150 				break;
   3151 			case 9:
   3152 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3153 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3154 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3155 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3156 				break;
   3157 			case 10:
   3158 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3159 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3160 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3161 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3162 				break;
   3163 			case 11:
   3164 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3165 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3166 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3167 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3168 				break;
   3169 			case 12:
   3170 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3171 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3172 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3173 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3174 				break;
   3175 			case 13:
   3176 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3177 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3178 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3179 						 NUM_BANKS(ADDR_SURF_8_BANK));
   3180 				break;
   3181 			case 14:
   3182 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3183 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3184 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   3185 						 NUM_BANKS(ADDR_SURF_4_BANK));
   3186 				break;
   3187 			default:
   3188 				gb_tile_moden = 0;
   3189 				break;
   3190 			}
   3191 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
   3192 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   3193 		}
   3194 	} else if (num_pipe_configs == 2) {
   3195 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
   3196 			switch (reg_offset) {
   3197 			case 0:
   3198 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3199 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3200 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3201 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   3202 				break;
   3203 			case 1:
   3204 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3205 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3207 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   3208 				break;
   3209 			case 2:
   3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   3214 				break;
   3215 			case 3:
   3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3219 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   3220 				break;
   3221 			case 4:
   3222 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3223 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3224 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3225 						 TILE_SPLIT(split_equal_to_row_size));
   3226 				break;
   3227 			case 5:
   3228 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3229 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3230 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3231 				break;
   3232 			case 6:
   3233 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3234 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3235 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3236 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   3237 				break;
   3238 			case 7:
   3239 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3240 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   3241 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3242 						 TILE_SPLIT(split_equal_to_row_size));
   3243 				break;
   3244 			case 8:
   3245 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   3246 						PIPE_CONFIG(ADDR_SURF_P2);
   3247 				break;
   3248 			case 9:
   3249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3250 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3251 						 PIPE_CONFIG(ADDR_SURF_P2));
   3252 				break;
   3253 			case 10:
   3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3258 				break;
   3259 			case 11:
   3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3264 				break;
   3265 			case 12:
   3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3267 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3268 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3269 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3270 				break;
   3271 			case 13:
   3272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3273 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3274 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   3275 				break;
   3276 			case 14:
   3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3281 				break;
   3282 			case 16:
   3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3287 				break;
   3288 			case 17:
   3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3291 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3293 				break;
   3294 			case 27:
   3295 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3296 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3297 						 PIPE_CONFIG(ADDR_SURF_P2));
   3298 				break;
   3299 			case 28:
   3300 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3302 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3303 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3304 				break;
   3305 			case 29:
   3306 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3308 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3309 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3310 				break;
   3311 			case 30:
   3312 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   3313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3314 						 PIPE_CONFIG(ADDR_SURF_P2) |
   3315 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3316 				break;
   3317 			default:
   3318 				gb_tile_moden = 0;
   3319 				break;
   3320 			}
   3321 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
   3322 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   3323 		}
   3324 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
   3325 			switch (reg_offset) {
   3326 			case 0:
   3327 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3328 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3329 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3330 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3331 				break;
   3332 			case 1:
   3333 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3334 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3335 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3336 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3337 				break;
   3338 			case 2:
   3339 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3340 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3341 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3342 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3343 				break;
   3344 			case 3:
   3345 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3346 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3347 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3348 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3349 				break;
   3350 			case 4:
   3351 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3352 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3353 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3354 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3355 				break;
   3356 			case 5:
   3357 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3358 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3359 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3360 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3361 				break;
   3362 			case 6:
   3363 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3364 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3365 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3366 						 NUM_BANKS(ADDR_SURF_8_BANK));
   3367 				break;
   3368 			case 8:
   3369 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   3370 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   3371 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3372 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3373 				break;
   3374 			case 9:
   3375 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   3376 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3377 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3378 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3379 				break;
   3380 			case 10:
   3381 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3382 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3383 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3384 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3385 				break;
   3386 			case 11:
   3387 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3388 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3389 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3390 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3391 				break;
   3392 			case 12:
   3393 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3394 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3395 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3396 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3397 				break;
   3398 			case 13:
   3399 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3400 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3401 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3402 						 NUM_BANKS(ADDR_SURF_16_BANK));
   3403 				break;
   3404 			case 14:
   3405 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3406 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3407 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3408 						 NUM_BANKS(ADDR_SURF_8_BANK));
   3409 				break;
   3410 			default:
   3411 				gb_tile_moden = 0;
   3412 				break;
   3413 			}
   3414 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
   3415 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
   3416 		}
   3417 	} else
   3418 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
   3419 }
   3420 
   3421 /**
   3422  * cik_select_se_sh - select which SE, SH to address
   3423  *
   3424  * @rdev: radeon_device pointer
   3425  * @se_num: shader engine to address
   3426  * @sh_num: sh block to address
   3427  *
   3428  * Select which SE, SH combinations to address. Certain
   3429  * registers are instanced per SE or SH.  0xffffffff means
   3430  * broadcast to all SEs or SHs (CIK).
   3431  */
   3432 static void cik_select_se_sh(struct radeon_device *rdev,
   3433 			     u32 se_num, u32 sh_num)
   3434 {
   3435 	u32 data = INSTANCE_BROADCAST_WRITES;
   3436 
   3437 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
   3438 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
   3439 	else if (se_num == 0xffffffff)
   3440 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
   3441 	else if (sh_num == 0xffffffff)
   3442 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
   3443 	else
   3444 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
   3445 	WREG32(GRBM_GFX_INDEX, data);
   3446 }
   3447 
   3448 /**
   3449  * cik_create_bitmask - create a bitmask
   3450  *
   3451  * @bit_width: length of the mask
   3452  *
   3453  * create a variable length bit mask (CIK).
   3454  * Returns the bitmask.
   3455  */
   3456 static u32 cik_create_bitmask(u32 bit_width)
   3457 {
   3458 	u32 i, mask = 0;
   3459 
   3460 	for (i = 0; i < bit_width; i++) {
   3461 		mask <<= 1;
   3462 		mask |= 1;
   3463 	}
   3464 	return mask;
   3465 }
   3466 
   3467 /**
   3468  * cik_get_rb_disabled - computes the mask of disabled RBs
   3469  *
   3470  * @rdev: radeon_device pointer
   3471  * @max_rb_num: max RBs (render backends) for the asic
   3472  * @se_num: number of SEs (shader engines) for the asic
   3473  * @sh_per_se: number of SH blocks per SE for the asic
   3474  *
   3475  * Calculates the bitmask of disabled RBs (CIK).
   3476  * Returns the disabled RB bitmask.
   3477  */
   3478 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
   3479 			      u32 max_rb_num_per_se,
   3480 			      u32 sh_per_se)
   3481 {
   3482 	u32 data, mask;
   3483 
   3484 	data = RREG32(CC_RB_BACKEND_DISABLE);
   3485 	if (data & 1)
   3486 		data &= BACKEND_DISABLE_MASK;
   3487 	else
   3488 		data = 0;
   3489 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
   3490 
   3491 	data >>= BACKEND_DISABLE_SHIFT;
   3492 
   3493 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
   3494 
   3495 	return data & mask;
   3496 }
   3497 
   3498 /**
   3499  * cik_setup_rb - setup the RBs on the asic
   3500  *
   3501  * @rdev: radeon_device pointer
   3502  * @se_num: number of SEs (shader engines) for the asic
   3503  * @sh_per_se: number of SH blocks per SE for the asic
   3504  * @max_rb_num: max RBs (render backends) for the asic
   3505  *
   3506  * Configures per-SE/SH RB registers (CIK).
   3507  */
   3508 static void cik_setup_rb(struct radeon_device *rdev,
   3509 			 u32 se_num, u32 sh_per_se,
   3510 			 u32 max_rb_num_per_se)
   3511 {
   3512 	int i, j;
   3513 	u32 data, mask;
   3514 	u32 disabled_rbs = 0;
   3515 	u32 enabled_rbs = 0;
   3516 
   3517 	mutex_lock(&rdev->grbm_idx_mutex);
   3518 	for (i = 0; i < se_num; i++) {
   3519 		for (j = 0; j < sh_per_se; j++) {
   3520 			cik_select_se_sh(rdev, i, j);
   3521 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
   3522 			if (rdev->family == CHIP_HAWAII)
   3523 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
   3524 			else
   3525 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
   3526 		}
   3527 	}
   3528 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   3529 	mutex_unlock(&rdev->grbm_idx_mutex);
   3530 
   3531 	mask = 1;
   3532 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
   3533 		if (!(disabled_rbs & mask))
   3534 			enabled_rbs |= mask;
   3535 		mask <<= 1;
   3536 	}
   3537 
   3538 	rdev->config.cik.backend_enable_mask = enabled_rbs;
   3539 
   3540 	mutex_lock(&rdev->grbm_idx_mutex);
   3541 	for (i = 0; i < se_num; i++) {
   3542 		cik_select_se_sh(rdev, i, 0xffffffff);
   3543 		data = 0;
   3544 		for (j = 0; j < sh_per_se; j++) {
   3545 			switch (enabled_rbs & 3) {
   3546 			case 0:
   3547 				if (j == 0)
   3548 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
   3549 				else
   3550 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
   3551 				break;
   3552 			case 1:
   3553 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
   3554 				break;
   3555 			case 2:
   3556 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
   3557 				break;
   3558 			case 3:
   3559 			default:
   3560 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
   3561 				break;
   3562 			}
   3563 			enabled_rbs >>= 2;
   3564 		}
   3565 		WREG32(PA_SC_RASTER_CONFIG, data);
   3566 	}
   3567 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   3568 	mutex_unlock(&rdev->grbm_idx_mutex);
   3569 }
   3570 
   3571 /**
   3572  * cik_gpu_init - setup the 3D engine
   3573  *
   3574  * @rdev: radeon_device pointer
   3575  *
   3576  * Configures the 3D engine and tiling configuration
   3577  * registers so that the 3D engine is usable.
   3578  */
   3579 static void cik_gpu_init(struct radeon_device *rdev)
   3580 {
   3581 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
   3582 	u32 mc_shared_chmap __unused, mc_arb_ramcfg;
   3583 	u32 hdp_host_path_cntl;
   3584 	u32 tmp;
   3585 	int i, j;
   3586 
   3587 	switch (rdev->family) {
   3588 	case CHIP_BONAIRE:
   3589 		rdev->config.cik.max_shader_engines = 2;
   3590 		rdev->config.cik.max_tile_pipes = 4;
   3591 		rdev->config.cik.max_cu_per_sh = 7;
   3592 		rdev->config.cik.max_sh_per_se = 1;
   3593 		rdev->config.cik.max_backends_per_se = 2;
   3594 		rdev->config.cik.max_texture_channel_caches = 4;
   3595 		rdev->config.cik.max_gprs = 256;
   3596 		rdev->config.cik.max_gs_threads = 32;
   3597 		rdev->config.cik.max_hw_contexts = 8;
   3598 
   3599 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3600 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3601 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3602 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3603 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   3604 		break;
   3605 	case CHIP_HAWAII:
   3606 		rdev->config.cik.max_shader_engines = 4;
   3607 		rdev->config.cik.max_tile_pipes = 16;
   3608 		rdev->config.cik.max_cu_per_sh = 11;
   3609 		rdev->config.cik.max_sh_per_se = 1;
   3610 		rdev->config.cik.max_backends_per_se = 4;
   3611 		rdev->config.cik.max_texture_channel_caches = 16;
   3612 		rdev->config.cik.max_gprs = 256;
   3613 		rdev->config.cik.max_gs_threads = 32;
   3614 		rdev->config.cik.max_hw_contexts = 8;
   3615 
   3616 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3617 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3618 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3619 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3620 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
   3621 		break;
   3622 	case CHIP_KAVERI:
   3623 		rdev->config.cik.max_shader_engines = 1;
   3624 		rdev->config.cik.max_tile_pipes = 4;
   3625 		rdev->config.cik.max_cu_per_sh = 8;
   3626 		rdev->config.cik.max_backends_per_se = 2;
   3627 		rdev->config.cik.max_sh_per_se = 1;
   3628 		rdev->config.cik.max_texture_channel_caches = 4;
   3629 		rdev->config.cik.max_gprs = 256;
   3630 		rdev->config.cik.max_gs_threads = 16;
   3631 		rdev->config.cik.max_hw_contexts = 8;
   3632 
   3633 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3634 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3635 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3636 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3637 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   3638 		break;
   3639 	case CHIP_KABINI:
   3640 	case CHIP_MULLINS:
   3641 	default:
   3642 		rdev->config.cik.max_shader_engines = 1;
   3643 		rdev->config.cik.max_tile_pipes = 2;
   3644 		rdev->config.cik.max_cu_per_sh = 2;
   3645 		rdev->config.cik.max_sh_per_se = 1;
   3646 		rdev->config.cik.max_backends_per_se = 1;
   3647 		rdev->config.cik.max_texture_channel_caches = 2;
   3648 		rdev->config.cik.max_gprs = 256;
   3649 		rdev->config.cik.max_gs_threads = 16;
   3650 		rdev->config.cik.max_hw_contexts = 8;
   3651 
   3652 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3653 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3654 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3655 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3656 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   3657 		break;
   3658 	}
   3659 
   3660 	/* Initialize HDP */
   3661 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
   3662 		WREG32((0x2c14 + j), 0x00000000);
   3663 		WREG32((0x2c18 + j), 0x00000000);
   3664 		WREG32((0x2c1c + j), 0x00000000);
   3665 		WREG32((0x2c20 + j), 0x00000000);
   3666 		WREG32((0x2c24 + j), 0x00000000);
   3667 	}
   3668 
   3669 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
   3670 	WREG32(SRBM_INT_CNTL, 0x1);
   3671 	WREG32(SRBM_INT_ACK, 0x1);
   3672 
   3673 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
   3674 
   3675 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
   3676 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
   3677 
   3678 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
   3679 	rdev->config.cik.mem_max_burst_length_bytes = 256;
   3680 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
   3681 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
   3682 	if (rdev->config.cik.mem_row_size_in_kb > 4)
   3683 		rdev->config.cik.mem_row_size_in_kb = 4;
   3684 	/* XXX use MC settings? */
   3685 	rdev->config.cik.shader_engine_tile_size = 32;
   3686 	rdev->config.cik.num_gpus = 1;
   3687 	rdev->config.cik.multi_gpu_tile_size = 64;
   3688 
   3689 	/* fix up row size */
   3690 	gb_addr_config &= ~ROW_SIZE_MASK;
   3691 	switch (rdev->config.cik.mem_row_size_in_kb) {
   3692 	case 1:
   3693 	default:
   3694 		gb_addr_config |= ROW_SIZE(0);
   3695 		break;
   3696 	case 2:
   3697 		gb_addr_config |= ROW_SIZE(1);
   3698 		break;
   3699 	case 4:
   3700 		gb_addr_config |= ROW_SIZE(2);
   3701 		break;
   3702 	}
   3703 
   3704 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
   3705 	 * not have bank info, so create a custom tiling dword.
   3706 	 * bits 3:0   num_pipes
   3707 	 * bits 7:4   num_banks
   3708 	 * bits 11:8  group_size
   3709 	 * bits 15:12 row_size
   3710 	 */
   3711 	rdev->config.cik.tile_config = 0;
   3712 	switch (rdev->config.cik.num_tile_pipes) {
   3713 	case 1:
   3714 		rdev->config.cik.tile_config |= (0 << 0);
   3715 		break;
   3716 	case 2:
   3717 		rdev->config.cik.tile_config |= (1 << 0);
   3718 		break;
   3719 	case 4:
   3720 		rdev->config.cik.tile_config |= (2 << 0);
   3721 		break;
   3722 	case 8:
   3723 	default:
   3724 		/* XXX what about 12? */
   3725 		rdev->config.cik.tile_config |= (3 << 0);
   3726 		break;
   3727 	}
   3728 	rdev->config.cik.tile_config |=
   3729 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
   3730 	rdev->config.cik.tile_config |=
   3731 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
   3732 	rdev->config.cik.tile_config |=
   3733 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
   3734 
   3735 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
   3736 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
   3737 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
   3738 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
   3739 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
   3740 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
   3741 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
   3742 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
   3743 
   3744 	cik_tiling_mode_table_init(rdev);
   3745 
   3746 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
   3747 		     rdev->config.cik.max_sh_per_se,
   3748 		     rdev->config.cik.max_backends_per_se);
   3749 
   3750 	rdev->config.cik.active_cus = 0;
   3751 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
   3752 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
   3753 			rdev->config.cik.active_cus +=
   3754 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
   3755 		}
   3756 	}
   3757 
   3758 	/* set HW defaults for 3D engine */
   3759 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
   3760 
   3761 	mutex_lock(&rdev->grbm_idx_mutex);
   3762 	/*
   3763 	 * making sure that the following register writes will be broadcasted
   3764 	 * to all the shaders
   3765 	 */
   3766 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   3767 	WREG32(SX_DEBUG_1, 0x20);
   3768 
   3769 	WREG32(TA_CNTL_AUX, 0x00010000);
   3770 
   3771 	tmp = RREG32(SPI_CONFIG_CNTL);
   3772 	tmp |= 0x03000000;
   3773 	WREG32(SPI_CONFIG_CNTL, tmp);
   3774 
   3775 	WREG32(SQ_CONFIG, 1);
   3776 
   3777 	WREG32(DB_DEBUG, 0);
   3778 
   3779 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
   3780 	tmp |= 0x00000400;
   3781 	WREG32(DB_DEBUG2, tmp);
   3782 
   3783 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
   3784 	tmp |= 0x00020200;
   3785 	WREG32(DB_DEBUG3, tmp);
   3786 
   3787 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
   3788 	tmp |= 0x00018208;
   3789 	WREG32(CB_HW_CONTROL, tmp);
   3790 
   3791 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
   3792 
   3793 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
   3794 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
   3795 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
   3796 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
   3797 
   3798 	WREG32(VGT_NUM_INSTANCES, 1);
   3799 
   3800 	WREG32(CP_PERFMON_CNTL, 0);
   3801 
   3802 	WREG32(SQ_CONFIG, 0);
   3803 
   3804 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
   3805 					  FORCE_EOV_MAX_REZ_CNT(255)));
   3806 
   3807 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
   3808 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
   3809 
   3810 	WREG32(VGT_GS_VERTEX_REUSE, 16);
   3811 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
   3812 
   3813 	tmp = RREG32(HDP_MISC_CNTL);
   3814 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
   3815 	WREG32(HDP_MISC_CNTL, tmp);
   3816 
   3817 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
   3818 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
   3819 
   3820 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
   3821 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
   3822 	mutex_unlock(&rdev->grbm_idx_mutex);
   3823 
   3824 	udelay(50);
   3825 }
   3826 
   3827 /*
   3828  * GPU scratch registers helpers function.
   3829  */
   3830 /**
   3831  * cik_scratch_init - setup driver info for CP scratch regs
   3832  *
   3833  * @rdev: radeon_device pointer
   3834  *
   3835  * Set up the number and offset of the CP scratch registers.
   3836  * NOTE: use of CP scratch registers is a legacy inferface and
   3837  * is not used by default on newer asics (r6xx+).  On newer asics,
   3838  * memory buffers are used for fences rather than scratch regs.
   3839  */
   3840 static void cik_scratch_init(struct radeon_device *rdev)
   3841 {
   3842 	int i;
   3843 
   3844 	rdev->scratch.num_reg = 7;
   3845 	rdev->scratch.reg_base = SCRATCH_REG0;
   3846 	for (i = 0; i < rdev->scratch.num_reg; i++) {
   3847 		rdev->scratch.free[i] = true;
   3848 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
   3849 	}
   3850 }
   3851 
   3852 /**
   3853  * cik_ring_test - basic gfx ring test
   3854  *
   3855  * @rdev: radeon_device pointer
   3856  * @ring: radeon_ring structure holding ring information
   3857  *
   3858  * Allocate a scratch register and write to it using the gfx ring (CIK).
   3859  * Provides a basic gfx ring test to verify that the ring is working.
   3860  * Used by cik_cp_gfx_resume();
   3861  * Returns 0 on success, error on failure.
   3862  */
   3863 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
   3864 {
   3865 	uint32_t scratch;
   3866 	uint32_t tmp = 0;
   3867 	unsigned i;
   3868 	int r;
   3869 
   3870 	r = radeon_scratch_get(rdev, &scratch);
   3871 	if (r) {
   3872 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
   3873 		return r;
   3874 	}
   3875 	WREG32(scratch, 0xCAFEDEAD);
   3876 	r = radeon_ring_lock(rdev, ring, 3);
   3877 	if (r) {
   3878 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
   3879 		radeon_scratch_free(rdev, scratch);
   3880 		return r;
   3881 	}
   3882 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
   3883 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
   3884 	radeon_ring_write(ring, 0xDEADBEEF);
   3885 	radeon_ring_unlock_commit(rdev, ring, false);
   3886 
   3887 	for (i = 0; i < rdev->usec_timeout; i++) {
   3888 		tmp = RREG32(scratch);
   3889 		if (tmp == 0xDEADBEEF)
   3890 			break;
   3891 		DRM_UDELAY(1);
   3892 	}
   3893 	if (i < rdev->usec_timeout) {
   3894 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
   3895 	} else {
   3896 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
   3897 			  ring->idx, scratch, tmp);
   3898 		r = -EINVAL;
   3899 	}
   3900 	radeon_scratch_free(rdev, scratch);
   3901 	return r;
   3902 }
   3903 
   3904 /**
   3905  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
   3906  *
   3907  * @rdev: radeon_device pointer
   3908  * @ridx: radeon ring index
   3909  *
   3910  * Emits an hdp flush on the cp.
   3911  */
   3912 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
   3913 				       int ridx)
   3914 {
   3915 	struct radeon_ring *ring = &rdev->ring[ridx];
   3916 	u32 ref_and_mask;
   3917 
   3918 	switch (ring->idx) {
   3919 	case CAYMAN_RING_TYPE_CP1_INDEX:
   3920 	case CAYMAN_RING_TYPE_CP2_INDEX:
   3921 	default:
   3922 		switch (ring->me) {
   3923 		case 0:
   3924 			ref_and_mask = CP2 << ring->pipe;
   3925 			break;
   3926 		case 1:
   3927 			ref_and_mask = CP6 << ring->pipe;
   3928 			break;
   3929 		default:
   3930 			return;
   3931 		}
   3932 		break;
   3933 	case RADEON_RING_TYPE_GFX_INDEX:
   3934 		ref_and_mask = CP0;
   3935 		break;
   3936 	}
   3937 
   3938 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   3939 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
   3940 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
   3941 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
   3942 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
   3943 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
   3944 	radeon_ring_write(ring, ref_and_mask);
   3945 	radeon_ring_write(ring, ref_and_mask);
   3946 	radeon_ring_write(ring, 0x20); /* poll interval */
   3947 }
   3948 
   3949 /**
   3950  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
   3951  *
   3952  * @rdev: radeon_device pointer
   3953  * @fence: radeon fence object
   3954  *
   3955  * Emits a fence sequnce number on the gfx ring and flushes
   3956  * GPU caches.
   3957  */
   3958 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
   3959 			     struct radeon_fence *fence)
   3960 {
   3961 	struct radeon_ring *ring = &rdev->ring[fence->ring];
   3962 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
   3963 
   3964 	/* Workaround for cache flush problems. First send a dummy EOP
   3965 	 * event down the pipe with seq one below.
   3966 	 */
   3967 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   3968 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
   3969 				 EOP_TC_ACTION_EN |
   3970 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   3971 				 EVENT_INDEX(5)));
   3972 	radeon_ring_write(ring, addr & 0xfffffffc);
   3973 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
   3974 				DATA_SEL(1) | INT_SEL(0));
   3975 	radeon_ring_write(ring, fence->seq - 1);
   3976 	radeon_ring_write(ring, 0);
   3977 
   3978 	/* Then send the real EOP event down the pipe. */
   3979 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   3980 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
   3981 				 EOP_TC_ACTION_EN |
   3982 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   3983 				 EVENT_INDEX(5)));
   3984 	radeon_ring_write(ring, addr & 0xfffffffc);
   3985 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
   3986 	radeon_ring_write(ring, fence->seq);
   3987 	radeon_ring_write(ring, 0);
   3988 }
   3989 
   3990 /**
   3991  * cik_fence_compute_ring_emit - emit a fence on the compute ring
   3992  *
   3993  * @rdev: radeon_device pointer
   3994  * @fence: radeon fence object
   3995  *
   3996  * Emits a fence sequnce number on the compute ring and flushes
   3997  * GPU caches.
   3998  */
   3999 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
   4000 				 struct radeon_fence *fence)
   4001 {
   4002 	struct radeon_ring *ring = &rdev->ring[fence->ring];
   4003 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
   4004 
   4005 	/* RELEASE_MEM - flush caches, send int */
   4006 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
   4007 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
   4008 				 EOP_TC_ACTION_EN |
   4009 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   4010 				 EVENT_INDEX(5)));
   4011 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
   4012 	radeon_ring_write(ring, addr & 0xfffffffc);
   4013 	radeon_ring_write(ring, upper_32_bits(addr));
   4014 	radeon_ring_write(ring, fence->seq);
   4015 	radeon_ring_write(ring, 0);
   4016 }
   4017 
   4018 /**
   4019  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
   4020  *
   4021  * @rdev: radeon_device pointer
   4022  * @ring: radeon ring buffer object
   4023  * @semaphore: radeon semaphore object
   4024  * @emit_wait: Is this a sempahore wait?
   4025  *
   4026  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
   4027  * from running ahead of semaphore waits.
   4028  */
   4029 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
   4030 			     struct radeon_ring *ring,
   4031 			     struct radeon_semaphore *semaphore,
   4032 			     bool emit_wait)
   4033 {
   4034 	uint64_t addr = semaphore->gpu_addr;
   4035 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
   4036 
   4037 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
   4038 	radeon_ring_write(ring, lower_32_bits(addr));
   4039 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
   4040 
   4041 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
   4042 		/* Prevent the PFP from running ahead of the semaphore wait */
   4043 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   4044 		radeon_ring_write(ring, 0x0);
   4045 	}
   4046 
   4047 	return true;
   4048 }
   4049 
   4050 /**
   4051  * cik_copy_cpdma - copy pages using the CP DMA engine
   4052  *
   4053  * @rdev: radeon_device pointer
   4054  * @src_offset: src GPU address
   4055  * @dst_offset: dst GPU address
   4056  * @num_gpu_pages: number of GPU pages to xfer
   4057  * @resv: reservation object to sync to
   4058  *
   4059  * Copy GPU paging using the CP DMA engine (CIK+).
   4060  * Used by the radeon ttm implementation to move pages if
   4061  * registered as the asic copy callback.
   4062  */
   4063 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
   4064 				    uint64_t src_offset, uint64_t dst_offset,
   4065 				    unsigned num_gpu_pages,
   4066 				    struct reservation_object *resv)
   4067 {
   4068 	struct radeon_fence *fence;
   4069 	struct radeon_sync sync;
   4070 	int ring_index = rdev->asic->copy.blit_ring_index;
   4071 	struct radeon_ring *ring = &rdev->ring[ring_index];
   4072 	u32 size_in_bytes, cur_size_in_bytes, control;
   4073 	int i, num_loops;
   4074 	int r = 0;
   4075 
   4076 	radeon_sync_create(&sync);
   4077 
   4078 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
   4079 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
   4080 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
   4081 	if (r) {
   4082 		DRM_ERROR("radeon: moving bo (%d).\n", r);
   4083 		radeon_sync_free(rdev, &sync, NULL);
   4084 		return ERR_PTR(r);
   4085 	}
   4086 
   4087 	radeon_sync_resv(rdev, &sync, resv, false);
   4088 	radeon_sync_rings(rdev, &sync, ring->idx);
   4089 
   4090 	for (i = 0; i < num_loops; i++) {
   4091 		cur_size_in_bytes = size_in_bytes;
   4092 		if (cur_size_in_bytes > 0x1fffff)
   4093 			cur_size_in_bytes = 0x1fffff;
   4094 		size_in_bytes -= cur_size_in_bytes;
   4095 		control = 0;
   4096 		if (size_in_bytes == 0)
   4097 			control |= PACKET3_DMA_DATA_CP_SYNC;
   4098 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
   4099 		radeon_ring_write(ring, control);
   4100 		radeon_ring_write(ring, lower_32_bits(src_offset));
   4101 		radeon_ring_write(ring, upper_32_bits(src_offset));
   4102 		radeon_ring_write(ring, lower_32_bits(dst_offset));
   4103 		radeon_ring_write(ring, upper_32_bits(dst_offset));
   4104 		radeon_ring_write(ring, cur_size_in_bytes);
   4105 		src_offset += cur_size_in_bytes;
   4106 		dst_offset += cur_size_in_bytes;
   4107 	}
   4108 
   4109 	r = radeon_fence_emit(rdev, &fence, ring->idx);
   4110 	if (r) {
   4111 		radeon_ring_unlock_undo(rdev, ring);
   4112 		radeon_sync_free(rdev, &sync, NULL);
   4113 		return ERR_PTR(r);
   4114 	}
   4115 
   4116 	radeon_ring_unlock_commit(rdev, ring, false);
   4117 	radeon_sync_free(rdev, &sync, fence);
   4118 
   4119 	return fence;
   4120 }
   4121 
   4122 /*
   4123  * IB stuff
   4124  */
   4125 /**
   4126  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
   4127  *
   4128  * @rdev: radeon_device pointer
   4129  * @ib: radeon indirect buffer object
   4130  *
   4131  * Emits an DE (drawing engine) or CE (constant engine) IB
   4132  * on the gfx ring.  IBs are usually generated by userspace
   4133  * acceleration drivers and submitted to the kernel for
   4134  * sheduling on the ring.  This function schedules the IB
   4135  * on the gfx ring for execution by the GPU.
   4136  */
   4137 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
   4138 {
   4139 	struct radeon_ring *ring = &rdev->ring[ib->ring];
   4140 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
   4141 	u32 header, control = INDIRECT_BUFFER_VALID;
   4142 
   4143 	if (ib->is_const_ib) {
   4144 		/* set switch buffer packet before const IB */
   4145 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   4146 		radeon_ring_write(ring, 0);
   4147 
   4148 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
   4149 	} else {
   4150 		u32 next_rptr;
   4151 		if (ring->rptr_save_reg) {
   4152 			next_rptr = ring->wptr + 3 + 4;
   4153 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
   4154 			radeon_ring_write(ring, ((ring->rptr_save_reg -
   4155 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
   4156 			radeon_ring_write(ring, next_rptr);
   4157 		} else if (rdev->wb.enabled) {
   4158 			next_rptr = ring->wptr + 5 + 4;
   4159 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   4160 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
   4161 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
   4162 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
   4163 			radeon_ring_write(ring, next_rptr);
   4164 		}
   4165 
   4166 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
   4167 	}
   4168 
   4169 	control |= ib->length_dw | (vm_id << 24);
   4170 
   4171 	radeon_ring_write(ring, header);
   4172 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
   4173 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
   4174 	radeon_ring_write(ring, control);
   4175 }
   4176 
   4177 /**
   4178  * cik_ib_test - basic gfx ring IB test
   4179  *
   4180  * @rdev: radeon_device pointer
   4181  * @ring: radeon_ring structure holding ring information
   4182  *
   4183  * Allocate an IB and execute it on the gfx ring (CIK).
   4184  * Provides a basic gfx ring test to verify that IBs are working.
   4185  * Returns 0 on success, error on failure.
   4186  */
   4187 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
   4188 {
   4189 	struct radeon_ib ib;
   4190 	uint32_t scratch;
   4191 	uint32_t tmp = 0;
   4192 	unsigned i;
   4193 	int r;
   4194 
   4195 	r = radeon_scratch_get(rdev, &scratch);
   4196 	if (r) {
   4197 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
   4198 		return r;
   4199 	}
   4200 	WREG32(scratch, 0xCAFEDEAD);
   4201 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
   4202 	if (r) {
   4203 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
   4204 		radeon_scratch_free(rdev, scratch);
   4205 		return r;
   4206 	}
   4207 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
   4208 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
   4209 	ib.ptr[2] = 0xDEADBEEF;
   4210 	ib.length_dw = 3;
   4211 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
   4212 	if (r) {
   4213 		radeon_scratch_free(rdev, scratch);
   4214 		radeon_ib_free(rdev, &ib);
   4215 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
   4216 		return r;
   4217 	}
   4218 	r = radeon_fence_wait(ib.fence, false);
   4219 	if (r) {
   4220 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
   4221 		radeon_scratch_free(rdev, scratch);
   4222 		radeon_ib_free(rdev, &ib);
   4223 		return r;
   4224 	}
   4225 	for (i = 0; i < rdev->usec_timeout; i++) {
   4226 		tmp = RREG32(scratch);
   4227 		if (tmp == 0xDEADBEEF)
   4228 			break;
   4229 		DRM_UDELAY(1);
   4230 	}
   4231 	if (i < rdev->usec_timeout) {
   4232 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
   4233 	} else {
   4234 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
   4235 			  scratch, tmp);
   4236 		r = -EINVAL;
   4237 	}
   4238 	radeon_scratch_free(rdev, scratch);
   4239 	radeon_ib_free(rdev, &ib);
   4240 	return r;
   4241 }
   4242 
   4243 /*
   4244  * CP.
   4245  * On CIK, gfx and compute now have independant command processors.
   4246  *
   4247  * GFX
   4248  * Gfx consists of a single ring and can process both gfx jobs and
   4249  * compute jobs.  The gfx CP consists of three microengines (ME):
   4250  * PFP - Pre-Fetch Parser
   4251  * ME - Micro Engine
   4252  * CE - Constant Engine
   4253  * The PFP and ME make up what is considered the Drawing Engine (DE).
   4254  * The CE is an asynchronous engine used for updating buffer desciptors
   4255  * used by the DE so that they can be loaded into cache in parallel
   4256  * while the DE is processing state update packets.
   4257  *
   4258  * Compute
   4259  * The compute CP consists of two microengines (ME):
   4260  * MEC1 - Compute MicroEngine 1
   4261  * MEC2 - Compute MicroEngine 2
   4262  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
   4263  * The queues are exposed to userspace and are programmed directly
   4264  * by the compute runtime.
   4265  */
   4266 /**
   4267  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
   4268  *
   4269  * @rdev: radeon_device pointer
   4270  * @enable: enable or disable the MEs
   4271  *
   4272  * Halts or unhalts the gfx MEs.
   4273  */
   4274 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
   4275 {
   4276 	if (enable)
   4277 		WREG32(CP_ME_CNTL, 0);
   4278 	else {
   4279 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
   4280 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
   4281 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
   4282 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
   4283 	}
   4284 	udelay(50);
   4285 }
   4286 
   4287 /**
   4288  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
   4289  *
   4290  * @rdev: radeon_device pointer
   4291  *
   4292  * Loads the gfx PFP, ME, and CE ucode.
   4293  * Returns 0 for success, -EINVAL if the ucode is not available.
   4294  */
   4295 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
   4296 {
   4297 	int i;
   4298 
   4299 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
   4300 		return -EINVAL;
   4301 
   4302 	cik_cp_gfx_enable(rdev, false);
   4303 
   4304 	if (rdev->new_fw) {
   4305 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
   4306 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
   4307 		const struct gfx_firmware_header_v1_0 *ce_hdr =
   4308 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
   4309 		const struct gfx_firmware_header_v1_0 *me_hdr =
   4310 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
   4311 		const __le32 *fw_data;
   4312 		u32 fw_size;
   4313 
   4314 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
   4315 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
   4316 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
   4317 
   4318 		/* PFP */
   4319 		fw_data = (const __le32 *)
   4320 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
   4321 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
   4322 		WREG32(CP_PFP_UCODE_ADDR, 0);
   4323 		for (i = 0; i < fw_size; i++)
   4324 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
   4325 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
   4326 
   4327 		/* CE */
   4328 		fw_data = (const __le32 *)
   4329 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
   4330 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
   4331 		WREG32(CP_CE_UCODE_ADDR, 0);
   4332 		for (i = 0; i < fw_size; i++)
   4333 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
   4334 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
   4335 
   4336 		/* ME */
   4337 		fw_data = (const __be32 *)
   4338 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
   4339 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
   4340 		WREG32(CP_ME_RAM_WADDR, 0);
   4341 		for (i = 0; i < fw_size; i++)
   4342 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
   4343 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
   4344 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
   4345 	} else {
   4346 		const __be32 *fw_data;
   4347 
   4348 		/* PFP */
   4349 		fw_data = (const __be32 *)rdev->pfp_fw->data;
   4350 		WREG32(CP_PFP_UCODE_ADDR, 0);
   4351 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
   4352 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
   4353 		WREG32(CP_PFP_UCODE_ADDR, 0);
   4354 
   4355 		/* CE */
   4356 		fw_data = (const __be32 *)rdev->ce_fw->data;
   4357 		WREG32(CP_CE_UCODE_ADDR, 0);
   4358 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
   4359 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
   4360 		WREG32(CP_CE_UCODE_ADDR, 0);
   4361 
   4362 		/* ME */
   4363 		fw_data = (const __be32 *)rdev->me_fw->data;
   4364 		WREG32(CP_ME_RAM_WADDR, 0);
   4365 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
   4366 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
   4367 		WREG32(CP_ME_RAM_WADDR, 0);
   4368 	}
   4369 
   4370 	return 0;
   4371 }
   4372 
   4373 /**
   4374  * cik_cp_gfx_start - start the gfx ring
   4375  *
   4376  * @rdev: radeon_device pointer
   4377  *
   4378  * Enables the ring and loads the clear state context and other
   4379  * packets required to init the ring.
   4380  * Returns 0 for success, error for failure.
   4381  */
   4382 static int cik_cp_gfx_start(struct radeon_device *rdev)
   4383 {
   4384 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   4385 	int r, i;
   4386 
   4387 	/* init the CP */
   4388 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
   4389 	WREG32(CP_ENDIAN_SWAP, 0);
   4390 	WREG32(CP_DEVICE_ID, 1);
   4391 
   4392 	cik_cp_gfx_enable(rdev, true);
   4393 
   4394 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
   4395 	if (r) {
   4396 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
   4397 		return r;
   4398 	}
   4399 
   4400 	/* init the CE partitions.  CE only used for gfx on CIK */
   4401 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
   4402 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
   4403 	radeon_ring_write(ring, 0x8000);
   4404 	radeon_ring_write(ring, 0x8000);
   4405 
   4406 	/* setup clear context state */
   4407 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   4408 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   4409 
   4410 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   4411 	radeon_ring_write(ring, 0x80000000);
   4412 	radeon_ring_write(ring, 0x80000000);
   4413 
   4414 	for (i = 0; i < cik_default_size; i++)
   4415 		radeon_ring_write(ring, cik_default_state[i]);
   4416 
   4417 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   4418 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
   4419 
   4420 	/* set clear context state */
   4421 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
   4422 	radeon_ring_write(ring, 0);
   4423 
   4424 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   4425 	radeon_ring_write(ring, 0x00000316);
   4426 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
   4427 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
   4428 
   4429 	radeon_ring_unlock_commit(rdev, ring, false);
   4430 
   4431 	return 0;
   4432 }
   4433 
   4434 /**
   4435  * cik_cp_gfx_fini - stop the gfx ring
   4436  *
   4437  * @rdev: radeon_device pointer
   4438  *
   4439  * Stop the gfx ring and tear down the driver ring
   4440  * info.
   4441  */
   4442 static void cik_cp_gfx_fini(struct radeon_device *rdev)
   4443 {
   4444 	cik_cp_gfx_enable(rdev, false);
   4445 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
   4446 }
   4447 
   4448 /**
   4449  * cik_cp_gfx_resume - setup the gfx ring buffer registers
   4450  *
   4451  * @rdev: radeon_device pointer
   4452  *
   4453  * Program the location and size of the gfx ring buffer
   4454  * and test it to make sure it's working.
   4455  * Returns 0 for success, error for failure.
   4456  */
   4457 static int cik_cp_gfx_resume(struct radeon_device *rdev)
   4458 {
   4459 	struct radeon_ring *ring;
   4460 	u32 tmp;
   4461 	u32 rb_bufsz;
   4462 	u64 rb_addr;
   4463 	int r;
   4464 
   4465 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
   4466 	if (rdev->family != CHIP_HAWAII)
   4467 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
   4468 
   4469 	/* Set the write pointer delay */
   4470 	WREG32(CP_RB_WPTR_DELAY, 0);
   4471 
   4472 	/* set the RB to use vmid 0 */
   4473 	WREG32(CP_RB_VMID, 0);
   4474 
   4475 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
   4476 
   4477 	/* ring 0 - compute and gfx */
   4478 	/* Set ring buffer size */
   4479 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   4480 	rb_bufsz = order_base_2(ring->ring_size / 8);
   4481 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
   4482 #ifdef __BIG_ENDIAN
   4483 	tmp |= BUF_SWAP_32BIT;
   4484 #endif
   4485 	WREG32(CP_RB0_CNTL, tmp);
   4486 
   4487 	/* Initialize the ring buffer's read and write pointers */
   4488 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
   4489 	ring->wptr = 0;
   4490 	WREG32(CP_RB0_WPTR, ring->wptr);
   4491 
   4492 	/* set the wb address wether it's enabled or not */
   4493 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
   4494 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
   4495 
   4496 	/* scratch register shadowing is no longer supported */
   4497 	WREG32(SCRATCH_UMSK, 0);
   4498 
   4499 	if (!rdev->wb.enabled)
   4500 		tmp |= RB_NO_UPDATE;
   4501 
   4502 	mdelay(1);
   4503 	WREG32(CP_RB0_CNTL, tmp);
   4504 
   4505 	rb_addr = ring->gpu_addr >> 8;
   4506 	WREG32(CP_RB0_BASE, rb_addr);
   4507 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
   4508 
   4509 	/* start the ring */
   4510 	cik_cp_gfx_start(rdev);
   4511 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
   4512 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
   4513 	if (r) {
   4514 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
   4515 		return r;
   4516 	}
   4517 
   4518 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
   4519 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
   4520 
   4521 	return 0;
   4522 }
   4523 
   4524 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
   4525 		     struct radeon_ring *ring)
   4526 {
   4527 	u32 rptr;
   4528 
   4529 	if (rdev->wb.enabled)
   4530 		rptr = rdev->wb.wb[ring->rptr_offs/4];
   4531 	else
   4532 		rptr = RREG32(CP_RB0_RPTR);
   4533 
   4534 	return rptr;
   4535 }
   4536 
   4537 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
   4538 		     struct radeon_ring *ring)
   4539 {
   4540 	u32 wptr;
   4541 
   4542 	wptr = RREG32(CP_RB0_WPTR);
   4543 
   4544 	return wptr;
   4545 }
   4546 
   4547 void cik_gfx_set_wptr(struct radeon_device *rdev,
   4548 		      struct radeon_ring *ring)
   4549 {
   4550 	WREG32(CP_RB0_WPTR, ring->wptr);
   4551 	(void)RREG32(CP_RB0_WPTR);
   4552 }
   4553 
   4554 u32 cik_compute_get_rptr(struct radeon_device *rdev,
   4555 			 struct radeon_ring *ring)
   4556 {
   4557 	u32 rptr;
   4558 
   4559 	if (rdev->wb.enabled) {
   4560 		rptr = rdev->wb.wb[ring->rptr_offs/4];
   4561 	} else {
   4562 		mutex_lock(&rdev->srbm_mutex);
   4563 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
   4564 		rptr = RREG32(CP_HQD_PQ_RPTR);
   4565 		cik_srbm_select(rdev, 0, 0, 0, 0);
   4566 		mutex_unlock(&rdev->srbm_mutex);
   4567 	}
   4568 
   4569 	return rptr;
   4570 }
   4571 
   4572 u32 cik_compute_get_wptr(struct radeon_device *rdev,
   4573 			 struct radeon_ring *ring)
   4574 {
   4575 	u32 wptr;
   4576 
   4577 	if (rdev->wb.enabled) {
   4578 		/* XXX check if swapping is necessary on BE */
   4579 		wptr = rdev->wb.wb[ring->wptr_offs/4];
   4580 	} else {
   4581 		mutex_lock(&rdev->srbm_mutex);
   4582 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
   4583 		wptr = RREG32(CP_HQD_PQ_WPTR);
   4584 		cik_srbm_select(rdev, 0, 0, 0, 0);
   4585 		mutex_unlock(&rdev->srbm_mutex);
   4586 	}
   4587 
   4588 	return wptr;
   4589 }
   4590 
   4591 void cik_compute_set_wptr(struct radeon_device *rdev,
   4592 			  struct radeon_ring *ring)
   4593 {
   4594 	/* XXX check if swapping is necessary on BE */
   4595 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
   4596 	WDOORBELL32(ring->doorbell_index, ring->wptr);
   4597 }
   4598 
   4599 static void cik_compute_stop(struct radeon_device *rdev,
   4600 			     struct radeon_ring *ring)
   4601 {
   4602 	u32 j, tmp;
   4603 
   4604 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
   4605 	/* Disable wptr polling. */
   4606 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
   4607 	tmp &= ~WPTR_POLL_EN;
   4608 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
   4609 	/* Disable HQD. */
   4610 	if (RREG32(CP_HQD_ACTIVE) & 1) {
   4611 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
   4612 		for (j = 0; j < rdev->usec_timeout; j++) {
   4613 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
   4614 				break;
   4615 			udelay(1);
   4616 		}
   4617 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
   4618 		WREG32(CP_HQD_PQ_RPTR, 0);
   4619 		WREG32(CP_HQD_PQ_WPTR, 0);
   4620 	}
   4621 	cik_srbm_select(rdev, 0, 0, 0, 0);
   4622 }
   4623 
   4624 /**
   4625  * cik_cp_compute_enable - enable/disable the compute CP MEs
   4626  *
   4627  * @rdev: radeon_device pointer
   4628  * @enable: enable or disable the MEs
   4629  *
   4630  * Halts or unhalts the compute MEs.
   4631  */
   4632 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
   4633 {
   4634 	if (enable)
   4635 		WREG32(CP_MEC_CNTL, 0);
   4636 	else {
   4637 		/*
   4638 		 * To make hibernation reliable we need to clear compute ring
   4639 		 * configuration before halting the compute ring.
   4640 		 */
   4641 		mutex_lock(&rdev->srbm_mutex);
   4642 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
   4643 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
   4644 		mutex_unlock(&rdev->srbm_mutex);
   4645 
   4646 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
   4647 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
   4648 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
   4649 	}
   4650 	udelay(50);
   4651 }
   4652 
   4653 /**
   4654  * cik_cp_compute_load_microcode - load the compute CP ME ucode
   4655  *
   4656  * @rdev: radeon_device pointer
   4657  *
   4658  * Loads the compute MEC1&2 ucode.
   4659  * Returns 0 for success, -EINVAL if the ucode is not available.
   4660  */
   4661 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
   4662 {
   4663 	int i;
   4664 
   4665 	if (!rdev->mec_fw)
   4666 		return -EINVAL;
   4667 
   4668 	cik_cp_compute_enable(rdev, false);
   4669 
   4670 	if (rdev->new_fw) {
   4671 		const struct gfx_firmware_header_v1_0 *mec_hdr =
   4672 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
   4673 		const __le32 *fw_data;
   4674 		u32 fw_size;
   4675 
   4676 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
   4677 
   4678 		/* MEC1 */
   4679 		fw_data = (const __le32 *)
   4680 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
   4681 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
   4682 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
   4683 		for (i = 0; i < fw_size; i++)
   4684 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
   4685 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
   4686 
   4687 		/* MEC2 */
   4688 		if (rdev->family == CHIP_KAVERI) {
   4689 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
   4690 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
   4691 
   4692 			fw_data = (const __le32 *)
   4693 				(rdev->mec2_fw->data +
   4694 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
   4695 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
   4696 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
   4697 			for (i = 0; i < fw_size; i++)
   4698 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
   4699 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
   4700 		}
   4701 	} else {
   4702 		const __be32 *fw_data;
   4703 
   4704 		/* MEC1 */
   4705 		fw_data = (const __be32 *)rdev->mec_fw->data;
   4706 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
   4707 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
   4708 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
   4709 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
   4710 
   4711 		if (rdev->family == CHIP_KAVERI) {
   4712 			/* MEC2 */
   4713 			fw_data = (const __be32 *)rdev->mec_fw->data;
   4714 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
   4715 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
   4716 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
   4717 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
   4718 		}
   4719 	}
   4720 
   4721 	return 0;
   4722 }
   4723 
   4724 /**
   4725  * cik_cp_compute_start - start the compute queues
   4726  *
   4727  * @rdev: radeon_device pointer
   4728  *
   4729  * Enable the compute queues.
   4730  * Returns 0 for success, error for failure.
   4731  */
   4732 static int cik_cp_compute_start(struct radeon_device *rdev)
   4733 {
   4734 	cik_cp_compute_enable(rdev, true);
   4735 
   4736 	return 0;
   4737 }
   4738 
   4739 /**
   4740  * cik_cp_compute_fini - stop the compute queues
   4741  *
   4742  * @rdev: radeon_device pointer
   4743  *
   4744  * Stop the compute queues and tear down the driver queue
   4745  * info.
   4746  */
   4747 static void cik_cp_compute_fini(struct radeon_device *rdev)
   4748 {
   4749 	int i, idx, r;
   4750 
   4751 	cik_cp_compute_enable(rdev, false);
   4752 
   4753 	for (i = 0; i < 2; i++) {
   4754 		if (i == 0)
   4755 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
   4756 		else
   4757 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
   4758 
   4759 		if (rdev->ring[idx].mqd_obj) {
   4760 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
   4761 			if (unlikely(r != 0))
   4762 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
   4763 
   4764 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
   4765 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
   4766 
   4767 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
   4768 			rdev->ring[idx].mqd_obj = NULL;
   4769 		}
   4770 	}
   4771 }
   4772 
   4773 static void cik_mec_fini(struct radeon_device *rdev)
   4774 {
   4775 	int r;
   4776 
   4777 	if (rdev->mec.hpd_eop_obj) {
   4778 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
   4779 		if (unlikely(r != 0))
   4780 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
   4781 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
   4782 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
   4783 
   4784 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
   4785 		rdev->mec.hpd_eop_obj = NULL;
   4786 	}
   4787 }
   4788 
   4789 #define MEC_HPD_SIZE 2048
   4790 
   4791 static int cik_mec_init(struct radeon_device *rdev)
   4792 {
   4793 	int r;
   4794 	u32 *hpd;
   4795 
   4796 	/*
   4797 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
   4798 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
   4799 	 * Nonetheless, we assign only 1 pipe because all other pipes will
   4800 	 * be handled by KFD
   4801 	 */
   4802 	rdev->mec.num_mec = 1;
   4803 	rdev->mec.num_pipe = 1;
   4804 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
   4805 
   4806 	if (rdev->mec.hpd_eop_obj == NULL) {
   4807 		r = radeon_bo_create(rdev,
   4808 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
   4809 				     PAGE_SIZE, true,
   4810 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
   4811 				     &rdev->mec.hpd_eop_obj);
   4812 		if (r) {
   4813 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
   4814 			return r;
   4815 		}
   4816 	}
   4817 
   4818 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
   4819 	if (unlikely(r != 0)) {
   4820 		cik_mec_fini(rdev);
   4821 		return r;
   4822 	}
   4823 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
   4824 			  &rdev->mec.hpd_eop_gpu_addr);
   4825 	if (r) {
   4826 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
   4827 		cik_mec_fini(rdev);
   4828 		return r;
   4829 	}
   4830 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
   4831 	if (r) {
   4832 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
   4833 		cik_mec_fini(rdev);
   4834 		return r;
   4835 	}
   4836 
   4837 	/* clear memory.  Not sure if this is required or not */
   4838 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
   4839 
   4840 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
   4841 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
   4842 
   4843 	return 0;
   4844 }
   4845 
   4846 struct hqd_registers
   4847 {
   4848 	u32 cp_mqd_base_addr;
   4849 	u32 cp_mqd_base_addr_hi;
   4850 	u32 cp_hqd_active;
   4851 	u32 cp_hqd_vmid;
   4852 	u32 cp_hqd_persistent_state;
   4853 	u32 cp_hqd_pipe_priority;
   4854 	u32 cp_hqd_queue_priority;
   4855 	u32 cp_hqd_quantum;
   4856 	u32 cp_hqd_pq_base;
   4857 	u32 cp_hqd_pq_base_hi;
   4858 	u32 cp_hqd_pq_rptr;
   4859 	u32 cp_hqd_pq_rptr_report_addr;
   4860 	u32 cp_hqd_pq_rptr_report_addr_hi;
   4861 	u32 cp_hqd_pq_wptr_poll_addr;
   4862 	u32 cp_hqd_pq_wptr_poll_addr_hi;
   4863 	u32 cp_hqd_pq_doorbell_control;
   4864 	u32 cp_hqd_pq_wptr;
   4865 	u32 cp_hqd_pq_control;
   4866 	u32 cp_hqd_ib_base_addr;
   4867 	u32 cp_hqd_ib_base_addr_hi;
   4868 	u32 cp_hqd_ib_rptr;
   4869 	u32 cp_hqd_ib_control;
   4870 	u32 cp_hqd_iq_timer;
   4871 	u32 cp_hqd_iq_rptr;
   4872 	u32 cp_hqd_dequeue_request;
   4873 	u32 cp_hqd_dma_offload;
   4874 	u32 cp_hqd_sema_cmd;
   4875 	u32 cp_hqd_msg_type;
   4876 	u32 cp_hqd_atomic0_preop_lo;
   4877 	u32 cp_hqd_atomic0_preop_hi;
   4878 	u32 cp_hqd_atomic1_preop_lo;
   4879 	u32 cp_hqd_atomic1_preop_hi;
   4880 	u32 cp_hqd_hq_scheduler0;
   4881 	u32 cp_hqd_hq_scheduler1;
   4882 	u32 cp_mqd_control;
   4883 };
   4884 
   4885 struct bonaire_mqd
   4886 {
   4887 	u32 header;
   4888 	u32 dispatch_initiator;
   4889 	u32 dimensions[3];
   4890 	u32 start_idx[3];
   4891 	u32 num_threads[3];
   4892 	u32 pipeline_stat_enable;
   4893 	u32 perf_counter_enable;
   4894 	u32 pgm[2];
   4895 	u32 tba[2];
   4896 	u32 tma[2];
   4897 	u32 pgm_rsrc[2];
   4898 	u32 vmid;
   4899 	u32 resource_limits;
   4900 	u32 static_thread_mgmt01[2];
   4901 	u32 tmp_ring_size;
   4902 	u32 static_thread_mgmt23[2];
   4903 	u32 restart[3];
   4904 	u32 thread_trace_enable;
   4905 	u32 reserved1;
   4906 	u32 user_data[16];
   4907 	u32 vgtcs_invoke_count[2];
   4908 	struct hqd_registers queue_state;
   4909 	u32 dequeue_cntr;
   4910 	u32 interrupt_queue[64];
   4911 };
   4912 
   4913 /**
   4914  * cik_cp_compute_resume - setup the compute queue registers
   4915  *
   4916  * @rdev: radeon_device pointer
   4917  *
   4918  * Program the compute queues and test them to make sure they
   4919  * are working.
   4920  * Returns 0 for success, error for failure.
   4921  */
   4922 static int cik_cp_compute_resume(struct radeon_device *rdev)
   4923 {
   4924 	int r, i, j, idx;
   4925 	u32 tmp;
   4926 	bool use_doorbell = true;
   4927 	u64 hqd_gpu_addr;
   4928 	u64 mqd_gpu_addr;
   4929 	u64 eop_gpu_addr;
   4930 	u64 wb_gpu_addr;
   4931 	u32 *buf;
   4932 	struct bonaire_mqd *mqd;
   4933 
   4934 	r = cik_cp_compute_start(rdev);
   4935 	if (r)
   4936 		return r;
   4937 
   4938 	/* fix up chicken bits */
   4939 	tmp = RREG32(CP_CPF_DEBUG);
   4940 	tmp |= (1 << 23);
   4941 	WREG32(CP_CPF_DEBUG, tmp);
   4942 
   4943 	/* init the pipes */
   4944 	mutex_lock(&rdev->srbm_mutex);
   4945 
   4946 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
   4947 
   4948 	cik_srbm_select(rdev, 0, 0, 0, 0);
   4949 
   4950 	/* write the EOP addr */
   4951 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
   4952 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
   4953 
   4954 	/* set the VMID assigned */
   4955 	WREG32(CP_HPD_EOP_VMID, 0);
   4956 
   4957 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   4958 	tmp = RREG32(CP_HPD_EOP_CONTROL);
   4959 	tmp &= ~EOP_SIZE_MASK;
   4960 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
   4961 	WREG32(CP_HPD_EOP_CONTROL, tmp);
   4962 
   4963 	mutex_unlock(&rdev->srbm_mutex);
   4964 
   4965 	/* init the queues.  Just two for now. */
   4966 	for (i = 0; i < 2; i++) {
   4967 		if (i == 0)
   4968 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
   4969 		else
   4970 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
   4971 
   4972 		if (rdev->ring[idx].mqd_obj == NULL) {
   4973 			r = radeon_bo_create(rdev,
   4974 					     sizeof(struct bonaire_mqd),
   4975 					     PAGE_SIZE, true,
   4976 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
   4977 					     NULL, &rdev->ring[idx].mqd_obj);
   4978 			if (r) {
   4979 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
   4980 				return r;
   4981 			}
   4982 		}
   4983 
   4984 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
   4985 		if (unlikely(r != 0)) {
   4986 			cik_cp_compute_fini(rdev);
   4987 			return r;
   4988 		}
   4989 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
   4990 				  &mqd_gpu_addr);
   4991 		if (r) {
   4992 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
   4993 			cik_cp_compute_fini(rdev);
   4994 			return r;
   4995 		}
   4996 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
   4997 		if (r) {
   4998 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
   4999 			cik_cp_compute_fini(rdev);
   5000 			return r;
   5001 		}
   5002 
   5003 		/* init the mqd struct */
   5004 		memset(buf, 0, sizeof(struct bonaire_mqd));
   5005 
   5006 		mqd = (struct bonaire_mqd *)buf;
   5007 		mqd->header = 0xC0310800;
   5008 		mqd->static_thread_mgmt01[0] = 0xffffffff;
   5009 		mqd->static_thread_mgmt01[1] = 0xffffffff;
   5010 		mqd->static_thread_mgmt23[0] = 0xffffffff;
   5011 		mqd->static_thread_mgmt23[1] = 0xffffffff;
   5012 
   5013 		mutex_lock(&rdev->srbm_mutex);
   5014 		cik_srbm_select(rdev, rdev->ring[idx].me,
   5015 				rdev->ring[idx].pipe,
   5016 				rdev->ring[idx].queue, 0);
   5017 
   5018 		/* disable wptr polling */
   5019 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
   5020 		tmp &= ~WPTR_POLL_EN;
   5021 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
   5022 
   5023 		/* enable doorbell? */
   5024 		mqd->queue_state.cp_hqd_pq_doorbell_control =
   5025 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
   5026 		if (use_doorbell)
   5027 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
   5028 		else
   5029 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
   5030 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
   5031 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
   5032 
   5033 		/* disable the queue if it's active */
   5034 		mqd->queue_state.cp_hqd_dequeue_request = 0;
   5035 		mqd->queue_state.cp_hqd_pq_rptr = 0;
   5036 		mqd->queue_state.cp_hqd_pq_wptr= 0;
   5037 		if (RREG32(CP_HQD_ACTIVE) & 1) {
   5038 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
   5039 			for (j = 0; j < rdev->usec_timeout; j++) {
   5040 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
   5041 					break;
   5042 				udelay(1);
   5043 			}
   5044 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
   5045 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
   5046 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
   5047 		}
   5048 
   5049 		/* set the pointer to the MQD */
   5050 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
   5051 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
   5052 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
   5053 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
   5054 		/* set MQD vmid to 0 */
   5055 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
   5056 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
   5057 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
   5058 
   5059 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   5060 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
   5061 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
   5062 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
   5063 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
   5064 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
   5065 
   5066 		/* set up the HQD, this is similar to CP_RB0_CNTL */
   5067 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
   5068 		mqd->queue_state.cp_hqd_pq_control &=
   5069 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
   5070 
   5071 		mqd->queue_state.cp_hqd_pq_control |=
   5072 			order_base_2(rdev->ring[idx].ring_size / 8);
   5073 		mqd->queue_state.cp_hqd_pq_control |=
   5074 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
   5075 #ifdef __BIG_ENDIAN
   5076 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
   5077 #endif
   5078 		mqd->queue_state.cp_hqd_pq_control &=
   5079 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
   5080 		mqd->queue_state.cp_hqd_pq_control |=
   5081 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
   5082 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
   5083 
   5084 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
   5085 		if (i == 0)
   5086 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
   5087 		else
   5088 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
   5089 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
   5090 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
   5091 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
   5092 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
   5093 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
   5094 
   5095 		/* set the wb address wether it's enabled or not */
   5096 		if (i == 0)
   5097 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
   5098 		else
   5099 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
   5100 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
   5101 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
   5102 			upper_32_bits(wb_gpu_addr) & 0xffff;
   5103 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
   5104 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
   5105 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
   5106 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
   5107 
   5108 		/* enable the doorbell if requested */
   5109 		if (use_doorbell) {
   5110 			mqd->queue_state.cp_hqd_pq_doorbell_control =
   5111 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
   5112 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
   5113 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
   5114 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
   5115 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
   5116 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
   5117 				~(DOORBELL_SOURCE | DOORBELL_HIT);
   5118 
   5119 		} else {
   5120 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
   5121 		}
   5122 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
   5123 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
   5124 
   5125 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   5126 		rdev->ring[idx].wptr = 0;
   5127 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
   5128 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
   5129 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
   5130 
   5131 		/* set the vmid for the queue */
   5132 		mqd->queue_state.cp_hqd_vmid = 0;
   5133 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
   5134 
   5135 		/* activate the queue */
   5136 		mqd->queue_state.cp_hqd_active = 1;
   5137 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
   5138 
   5139 		cik_srbm_select(rdev, 0, 0, 0, 0);
   5140 		mutex_unlock(&rdev->srbm_mutex);
   5141 
   5142 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
   5143 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
   5144 
   5145 		rdev->ring[idx].ready = true;
   5146 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
   5147 		if (r)
   5148 			rdev->ring[idx].ready = false;
   5149 	}
   5150 
   5151 	return 0;
   5152 }
   5153 
   5154 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
   5155 {
   5156 	cik_cp_gfx_enable(rdev, enable);
   5157 	cik_cp_compute_enable(rdev, enable);
   5158 }
   5159 
   5160 static int cik_cp_load_microcode(struct radeon_device *rdev)
   5161 {
   5162 	int r;
   5163 
   5164 	r = cik_cp_gfx_load_microcode(rdev);
   5165 	if (r)
   5166 		return r;
   5167 	r = cik_cp_compute_load_microcode(rdev);
   5168 	if (r)
   5169 		return r;
   5170 
   5171 	return 0;
   5172 }
   5173 
   5174 static void cik_cp_fini(struct radeon_device *rdev)
   5175 {
   5176 	cik_cp_gfx_fini(rdev);
   5177 	cik_cp_compute_fini(rdev);
   5178 }
   5179 
   5180 static int cik_cp_resume(struct radeon_device *rdev)
   5181 {
   5182 	int r;
   5183 
   5184 	cik_enable_gui_idle_interrupt(rdev, false);
   5185 
   5186 	r = cik_cp_load_microcode(rdev);
   5187 	if (r)
   5188 		return r;
   5189 
   5190 	r = cik_cp_gfx_resume(rdev);
   5191 	if (r)
   5192 		return r;
   5193 	r = cik_cp_compute_resume(rdev);
   5194 	if (r)
   5195 		return r;
   5196 
   5197 	cik_enable_gui_idle_interrupt(rdev, true);
   5198 
   5199 	return 0;
   5200 }
   5201 
   5202 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
   5203 {
   5204 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
   5205 		RREG32(GRBM_STATUS));
   5206 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
   5207 		RREG32(GRBM_STATUS2));
   5208 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
   5209 		RREG32(GRBM_STATUS_SE0));
   5210 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
   5211 		RREG32(GRBM_STATUS_SE1));
   5212 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
   5213 		RREG32(GRBM_STATUS_SE2));
   5214 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
   5215 		RREG32(GRBM_STATUS_SE3));
   5216 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
   5217 		RREG32(SRBM_STATUS));
   5218 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
   5219 		RREG32(SRBM_STATUS2));
   5220 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
   5221 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
   5222 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
   5223 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
   5224 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
   5225 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
   5226 		 RREG32(CP_STALLED_STAT1));
   5227 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
   5228 		 RREG32(CP_STALLED_STAT2));
   5229 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
   5230 		 RREG32(CP_STALLED_STAT3));
   5231 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
   5232 		 RREG32(CP_CPF_BUSY_STAT));
   5233 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
   5234 		 RREG32(CP_CPF_STALLED_STAT1));
   5235 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
   5236 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
   5237 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
   5238 		 RREG32(CP_CPC_STALLED_STAT1));
   5239 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
   5240 }
   5241 
   5242 /**
   5243  * cik_gpu_check_soft_reset - check which blocks are busy
   5244  *
   5245  * @rdev: radeon_device pointer
   5246  *
   5247  * Check which blocks are busy and return the relevant reset
   5248  * mask to be used by cik_gpu_soft_reset().
   5249  * Returns a mask of the blocks to be reset.
   5250  */
   5251 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
   5252 {
   5253 	u32 reset_mask = 0;
   5254 	u32 tmp;
   5255 
   5256 	/* GRBM_STATUS */
   5257 	tmp = RREG32(GRBM_STATUS);
   5258 	if (tmp & (PA_BUSY | SC_BUSY |
   5259 		   BCI_BUSY | SX_BUSY |
   5260 		   TA_BUSY | VGT_BUSY |
   5261 		   DB_BUSY | CB_BUSY |
   5262 		   GDS_BUSY | SPI_BUSY |
   5263 		   IA_BUSY | IA_BUSY_NO_DMA))
   5264 		reset_mask |= RADEON_RESET_GFX;
   5265 
   5266 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
   5267 		reset_mask |= RADEON_RESET_CP;
   5268 
   5269 	/* GRBM_STATUS2 */
   5270 	tmp = RREG32(GRBM_STATUS2);
   5271 	if (tmp & RLC_BUSY)
   5272 		reset_mask |= RADEON_RESET_RLC;
   5273 
   5274 	/* SDMA0_STATUS_REG */
   5275 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
   5276 	if (!(tmp & SDMA_IDLE))
   5277 		reset_mask |= RADEON_RESET_DMA;
   5278 
   5279 	/* SDMA1_STATUS_REG */
   5280 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
   5281 	if (!(tmp & SDMA_IDLE))
   5282 		reset_mask |= RADEON_RESET_DMA1;
   5283 
   5284 	/* SRBM_STATUS2 */
   5285 	tmp = RREG32(SRBM_STATUS2);
   5286 	if (tmp & SDMA_BUSY)
   5287 		reset_mask |= RADEON_RESET_DMA;
   5288 
   5289 	if (tmp & SDMA1_BUSY)
   5290 		reset_mask |= RADEON_RESET_DMA1;
   5291 
   5292 	/* SRBM_STATUS */
   5293 	tmp = RREG32(SRBM_STATUS);
   5294 
   5295 	if (tmp & IH_BUSY)
   5296 		reset_mask |= RADEON_RESET_IH;
   5297 
   5298 	if (tmp & SEM_BUSY)
   5299 		reset_mask |= RADEON_RESET_SEM;
   5300 
   5301 	if (tmp & GRBM_RQ_PENDING)
   5302 		reset_mask |= RADEON_RESET_GRBM;
   5303 
   5304 	if (tmp & VMC_BUSY)
   5305 		reset_mask |= RADEON_RESET_VMC;
   5306 
   5307 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
   5308 		   MCC_BUSY | MCD_BUSY))
   5309 		reset_mask |= RADEON_RESET_MC;
   5310 
   5311 	if (evergreen_is_display_hung(rdev))
   5312 		reset_mask |= RADEON_RESET_DISPLAY;
   5313 
   5314 	/* Skip MC reset as it's mostly likely not hung, just busy */
   5315 	if (reset_mask & RADEON_RESET_MC) {
   5316 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
   5317 		reset_mask &= ~RADEON_RESET_MC;
   5318 	}
   5319 
   5320 	return reset_mask;
   5321 }
   5322 
   5323 /**
   5324  * cik_gpu_soft_reset - soft reset GPU
   5325  *
   5326  * @rdev: radeon_device pointer
   5327  * @reset_mask: mask of which blocks to reset
   5328  *
   5329  * Soft reset the blocks specified in @reset_mask.
   5330  */
   5331 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
   5332 {
   5333 	struct evergreen_mc_save save;
   5334 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
   5335 	u32 tmp;
   5336 
   5337 	if (reset_mask == 0)
   5338 		return;
   5339 
   5340 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
   5341 
   5342 	cik_print_gpu_status_regs(rdev);
   5343 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
   5344 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
   5345 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
   5346 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
   5347 
   5348 	/* disable CG/PG */
   5349 	cik_fini_pg(rdev);
   5350 	cik_fini_cg(rdev);
   5351 
   5352 	/* stop the rlc */
   5353 	cik_rlc_stop(rdev);
   5354 
   5355 	/* Disable GFX parsing/prefetching */
   5356 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
   5357 
   5358 	/* Disable MEC parsing/prefetching */
   5359 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
   5360 
   5361 	if (reset_mask & RADEON_RESET_DMA) {
   5362 		/* sdma0 */
   5363 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
   5364 		tmp |= SDMA_HALT;
   5365 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
   5366 	}
   5367 	if (reset_mask & RADEON_RESET_DMA1) {
   5368 		/* sdma1 */
   5369 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
   5370 		tmp |= SDMA_HALT;
   5371 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
   5372 	}
   5373 
   5374 	evergreen_mc_stop(rdev, &save);
   5375 	if (evergreen_mc_wait_for_idle(rdev)) {
   5376 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
   5377 	}
   5378 
   5379 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
   5380 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
   5381 
   5382 	if (reset_mask & RADEON_RESET_CP) {
   5383 		grbm_soft_reset |= SOFT_RESET_CP;
   5384 
   5385 		srbm_soft_reset |= SOFT_RESET_GRBM;
   5386 	}
   5387 
   5388 	if (reset_mask & RADEON_RESET_DMA)
   5389 		srbm_soft_reset |= SOFT_RESET_SDMA;
   5390 
   5391 	if (reset_mask & RADEON_RESET_DMA1)
   5392 		srbm_soft_reset |= SOFT_RESET_SDMA1;
   5393 
   5394 	if (reset_mask & RADEON_RESET_DISPLAY)
   5395 		srbm_soft_reset |= SOFT_RESET_DC;
   5396 
   5397 	if (reset_mask & RADEON_RESET_RLC)
   5398 		grbm_soft_reset |= SOFT_RESET_RLC;
   5399 
   5400 	if (reset_mask & RADEON_RESET_SEM)
   5401 		srbm_soft_reset |= SOFT_RESET_SEM;
   5402 
   5403 	if (reset_mask & RADEON_RESET_IH)
   5404 		srbm_soft_reset |= SOFT_RESET_IH;
   5405 
   5406 	if (reset_mask & RADEON_RESET_GRBM)
   5407 		srbm_soft_reset |= SOFT_RESET_GRBM;
   5408 
   5409 	if (reset_mask & RADEON_RESET_VMC)
   5410 		srbm_soft_reset |= SOFT_RESET_VMC;
   5411 
   5412 	if (!(rdev->flags & RADEON_IS_IGP)) {
   5413 		if (reset_mask & RADEON_RESET_MC)
   5414 			srbm_soft_reset |= SOFT_RESET_MC;
   5415 	}
   5416 
   5417 	if (grbm_soft_reset) {
   5418 		tmp = RREG32(GRBM_SOFT_RESET);
   5419 		tmp |= grbm_soft_reset;
   5420 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
   5421 		WREG32(GRBM_SOFT_RESET, tmp);
   5422 		tmp = RREG32(GRBM_SOFT_RESET);
   5423 
   5424 		udelay(50);
   5425 
   5426 		tmp &= ~grbm_soft_reset;
   5427 		WREG32(GRBM_SOFT_RESET, tmp);
   5428 		tmp = RREG32(GRBM_SOFT_RESET);
   5429 	}
   5430 
   5431 	if (srbm_soft_reset) {
   5432 		tmp = RREG32(SRBM_SOFT_RESET);
   5433 		tmp |= srbm_soft_reset;
   5434 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
   5435 		WREG32(SRBM_SOFT_RESET, tmp);
   5436 		tmp = RREG32(SRBM_SOFT_RESET);
   5437 
   5438 		udelay(50);
   5439 
   5440 		tmp &= ~srbm_soft_reset;
   5441 		WREG32(SRBM_SOFT_RESET, tmp);
   5442 		tmp = RREG32(SRBM_SOFT_RESET);
   5443 	}
   5444 
   5445 	/* Wait a little for things to settle down */
   5446 	udelay(50);
   5447 
   5448 	evergreen_mc_resume(rdev, &save);
   5449 	udelay(50);
   5450 
   5451 	cik_print_gpu_status_regs(rdev);
   5452 }
   5453 
   5454 struct kv_reset_save_regs {
   5455 	u32 gmcon_reng_execute;
   5456 	u32 gmcon_misc;
   5457 	u32 gmcon_misc3;
   5458 };
   5459 
   5460 static void kv_save_regs_for_reset(struct radeon_device *rdev,
   5461 				   struct kv_reset_save_regs *save)
   5462 {
   5463 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
   5464 	save->gmcon_misc = RREG32(GMCON_MISC);
   5465 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
   5466 
   5467 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
   5468 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
   5469 						STCTRL_STUTTER_EN));
   5470 }
   5471 
   5472 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
   5473 				      struct kv_reset_save_regs *save)
   5474 {
   5475 	int i;
   5476 
   5477 	WREG32(GMCON_PGFSM_WRITE, 0);
   5478 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
   5479 
   5480 	for (i = 0; i < 5; i++)
   5481 		WREG32(GMCON_PGFSM_WRITE, 0);
   5482 
   5483 	WREG32(GMCON_PGFSM_WRITE, 0);
   5484 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
   5485 
   5486 	for (i = 0; i < 5; i++)
   5487 		WREG32(GMCON_PGFSM_WRITE, 0);
   5488 
   5489 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
   5490 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
   5491 
   5492 	for (i = 0; i < 5; i++)
   5493 		WREG32(GMCON_PGFSM_WRITE, 0);
   5494 
   5495 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
   5496 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
   5497 
   5498 	for (i = 0; i < 5; i++)
   5499 		WREG32(GMCON_PGFSM_WRITE, 0);
   5500 
   5501 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
   5502 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
   5503 
   5504 	for (i = 0; i < 5; i++)
   5505 		WREG32(GMCON_PGFSM_WRITE, 0);
   5506 
   5507 	WREG32(GMCON_PGFSM_WRITE, 0);
   5508 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
   5509 
   5510 	for (i = 0; i < 5; i++)
   5511 		WREG32(GMCON_PGFSM_WRITE, 0);
   5512 
   5513 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
   5514 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
   5515 
   5516 	for (i = 0; i < 5; i++)
   5517 		WREG32(GMCON_PGFSM_WRITE, 0);
   5518 
   5519 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
   5520 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
   5521 
   5522 	for (i = 0; i < 5; i++)
   5523 		WREG32(GMCON_PGFSM_WRITE, 0);
   5524 
   5525 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
   5526 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
   5527 
   5528 	for (i = 0; i < 5; i++)
   5529 		WREG32(GMCON_PGFSM_WRITE, 0);
   5530 
   5531 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
   5532 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
   5533 
   5534 	for (i = 0; i < 5; i++)
   5535 		WREG32(GMCON_PGFSM_WRITE, 0);
   5536 
   5537 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
   5538 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
   5539 
   5540 	WREG32(GMCON_MISC3, save->gmcon_misc3);
   5541 	WREG32(GMCON_MISC, save->gmcon_misc);
   5542 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
   5543 }
   5544 
   5545 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
   5546 {
   5547 	struct evergreen_mc_save save;
   5548 	struct kv_reset_save_regs kv_save = { 0 };
   5549 	u32 tmp, i;
   5550 
   5551 	dev_info(rdev->dev, "GPU pci config reset\n");
   5552 
   5553 	/* disable dpm? */
   5554 
   5555 	/* disable cg/pg */
   5556 	cik_fini_pg(rdev);
   5557 	cik_fini_cg(rdev);
   5558 
   5559 	/* Disable GFX parsing/prefetching */
   5560 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
   5561 
   5562 	/* Disable MEC parsing/prefetching */
   5563 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
   5564 
   5565 	/* sdma0 */
   5566 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
   5567 	tmp |= SDMA_HALT;
   5568 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
   5569 	/* sdma1 */
   5570 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
   5571 	tmp |= SDMA_HALT;
   5572 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
   5573 	/* XXX other engines? */
   5574 
   5575 	/* halt the rlc, disable cp internal ints */
   5576 	cik_rlc_stop(rdev);
   5577 
   5578 	udelay(50);
   5579 
   5580 	/* disable mem access */
   5581 	evergreen_mc_stop(rdev, &save);
   5582 	if (evergreen_mc_wait_for_idle(rdev)) {
   5583 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
   5584 	}
   5585 
   5586 	if (rdev->flags & RADEON_IS_IGP)
   5587 		kv_save_regs_for_reset(rdev, &kv_save);
   5588 
   5589 	/* disable BM */
   5590 	pci_clear_master(rdev->pdev);
   5591 	/* reset */
   5592 	radeon_pci_config_reset(rdev);
   5593 
   5594 	udelay(100);
   5595 
   5596 	/* wait for asic to come out of reset */
   5597 	for (i = 0; i < rdev->usec_timeout; i++) {
   5598 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
   5599 			break;
   5600 		udelay(1);
   5601 	}
   5602 
   5603 	/* does asic init need to be run first??? */
   5604 	if (rdev->flags & RADEON_IS_IGP)
   5605 		kv_restore_regs_for_reset(rdev, &kv_save);
   5606 }
   5607 
   5608 /**
   5609  * cik_asic_reset - soft reset GPU
   5610  *
   5611  * @rdev: radeon_device pointer
   5612  *
   5613  * Look up which blocks are hung and attempt
   5614  * to reset them.
   5615  * Returns 0 for success.
   5616  */
   5617 int cik_asic_reset(struct radeon_device *rdev)
   5618 {
   5619 	u32 reset_mask;
   5620 
   5621 	reset_mask = cik_gpu_check_soft_reset(rdev);
   5622 
   5623 	if (reset_mask)
   5624 		r600_set_bios_scratch_engine_hung(rdev, true);
   5625 
   5626 	/* try soft reset */
   5627 	cik_gpu_soft_reset(rdev, reset_mask);
   5628 
   5629 	reset_mask = cik_gpu_check_soft_reset(rdev);
   5630 
   5631 	/* try pci config reset */
   5632 	if (reset_mask && radeon_hard_reset)
   5633 		cik_gpu_pci_config_reset(rdev);
   5634 
   5635 	reset_mask = cik_gpu_check_soft_reset(rdev);
   5636 
   5637 	if (!reset_mask)
   5638 		r600_set_bios_scratch_engine_hung(rdev, false);
   5639 
   5640 	return 0;
   5641 }
   5642 
   5643 /**
   5644  * cik_gfx_is_lockup - check if the 3D engine is locked up
   5645  *
   5646  * @rdev: radeon_device pointer
   5647  * @ring: radeon_ring structure holding ring information
   5648  *
   5649  * Check if the 3D engine is locked up (CIK).
   5650  * Returns true if the engine is locked, false if not.
   5651  */
   5652 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
   5653 {
   5654 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
   5655 
   5656 	if (!(reset_mask & (RADEON_RESET_GFX |
   5657 			    RADEON_RESET_COMPUTE |
   5658 			    RADEON_RESET_CP))) {
   5659 		radeon_ring_lockup_update(rdev, ring);
   5660 		return false;
   5661 	}
   5662 	return radeon_ring_test_lockup(rdev, ring);
   5663 }
   5664 
   5665 /* MC */
   5666 /**
   5667  * cik_mc_program - program the GPU memory controller
   5668  *
   5669  * @rdev: radeon_device pointer
   5670  *
   5671  * Set the location of vram, gart, and AGP in the GPU's
   5672  * physical address space (CIK).
   5673  */
   5674 static void cik_mc_program(struct radeon_device *rdev)
   5675 {
   5676 	struct evergreen_mc_save save;
   5677 	u32 tmp;
   5678 	int i, j;
   5679 
   5680 	/* Initialize HDP */
   5681 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
   5682 		WREG32((0x2c14 + j), 0x00000000);
   5683 		WREG32((0x2c18 + j), 0x00000000);
   5684 		WREG32((0x2c1c + j), 0x00000000);
   5685 		WREG32((0x2c20 + j), 0x00000000);
   5686 		WREG32((0x2c24 + j), 0x00000000);
   5687 	}
   5688 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
   5689 
   5690 	evergreen_mc_stop(rdev, &save);
   5691 	if (radeon_mc_wait_for_idle(rdev)) {
   5692 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
   5693 	}
   5694 	/* Lockout access through VGA aperture*/
   5695 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
   5696 	/* Update configuration */
   5697 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
   5698 	       rdev->mc.vram_start >> 12);
   5699 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
   5700 	       rdev->mc.vram_end >> 12);
   5701 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
   5702 	       rdev->vram_scratch.gpu_addr >> 12);
   5703 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
   5704 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
   5705 	WREG32(MC_VM_FB_LOCATION, tmp);
   5706 	/* XXX double check these! */
   5707 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
   5708 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
   5709 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
   5710 	WREG32(MC_VM_AGP_BASE, 0);
   5711 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
   5712 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
   5713 	if (radeon_mc_wait_for_idle(rdev)) {
   5714 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
   5715 	}
   5716 	evergreen_mc_resume(rdev, &save);
   5717 	/* we need to own VRAM, so turn off the VGA renderer here
   5718 	 * to stop it overwriting our objects */
   5719 	rv515_vga_render_disable(rdev);
   5720 }
   5721 
   5722 /**
   5723  * cik_mc_init - initialize the memory controller driver params
   5724  *
   5725  * @rdev: radeon_device pointer
   5726  *
   5727  * Look up the amount of vram, vram width, and decide how to place
   5728  * vram and gart within the GPU's physical address space (CIK).
   5729  * Returns 0 for success.
   5730  */
   5731 static int cik_mc_init(struct radeon_device *rdev)
   5732 {
   5733 	u32 tmp;
   5734 	int chansize, numchan;
   5735 
   5736 	/* Get VRAM informations */
   5737 	rdev->mc.vram_is_ddr = true;
   5738 	tmp = RREG32(MC_ARB_RAMCFG);
   5739 	if (tmp & CHANSIZE_MASK) {
   5740 		chansize = 64;
   5741 	} else {
   5742 		chansize = 32;
   5743 	}
   5744 	tmp = RREG32(MC_SHARED_CHMAP);
   5745 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
   5746 	case 0:
   5747 	default:
   5748 		numchan = 1;
   5749 		break;
   5750 	case 1:
   5751 		numchan = 2;
   5752 		break;
   5753 	case 2:
   5754 		numchan = 4;
   5755 		break;
   5756 	case 3:
   5757 		numchan = 8;
   5758 		break;
   5759 	case 4:
   5760 		numchan = 3;
   5761 		break;
   5762 	case 5:
   5763 		numchan = 6;
   5764 		break;
   5765 	case 6:
   5766 		numchan = 10;
   5767 		break;
   5768 	case 7:
   5769 		numchan = 12;
   5770 		break;
   5771 	case 8:
   5772 		numchan = 16;
   5773 		break;
   5774 	}
   5775 	rdev->mc.vram_width = numchan * chansize;
   5776 	/* Could aper size report 0 ? */
   5777 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
   5778 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
   5779 	/* size in MB on si */
   5780 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
   5781 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
   5782 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
   5783 	si_vram_gtt_location(rdev, &rdev->mc);
   5784 	radeon_update_bandwidth_info(rdev);
   5785 
   5786 	return 0;
   5787 }
   5788 
   5789 /*
   5790  * GART
   5791  * VMID 0 is the physical GPU addresses as used by the kernel.
   5792  * VMIDs 1-15 are used for userspace clients and are handled
   5793  * by the radeon vm/hsa code.
   5794  */
   5795 /**
   5796  * cik_pcie_gart_tlb_flush - gart tlb flush callback
   5797  *
   5798  * @rdev: radeon_device pointer
   5799  *
   5800  * Flush the TLB for the VMID 0 page table (CIK).
   5801  */
   5802 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
   5803 {
   5804 	/* flush hdp cache */
   5805 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
   5806 
   5807 	/* bits 0-15 are the VM contexts0-15 */
   5808 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
   5809 }
   5810 
   5811 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
   5812 {
   5813 	int i;
   5814 	uint32_t sh_mem_bases, sh_mem_config;
   5815 
   5816 	sh_mem_bases = 0x6000 | 0x6000 << 16;
   5817 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
   5818 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
   5819 
   5820 	mutex_lock(&rdev->srbm_mutex);
   5821 	for (i = 8; i < 16; i++) {
   5822 		cik_srbm_select(rdev, 0, 0, 0, i);
   5823 		/* CP and shaders */
   5824 		WREG32(SH_MEM_CONFIG, sh_mem_config);
   5825 		WREG32(SH_MEM_APE1_BASE, 1);
   5826 		WREG32(SH_MEM_APE1_LIMIT, 0);
   5827 		WREG32(SH_MEM_BASES, sh_mem_bases);
   5828 	}
   5829 	cik_srbm_select(rdev, 0, 0, 0, 0);
   5830 	mutex_unlock(&rdev->srbm_mutex);
   5831 }
   5832 
   5833 /**
   5834  * cik_pcie_gart_enable - gart enable
   5835  *
   5836  * @rdev: radeon_device pointer
   5837  *
   5838  * This sets up the TLBs, programs the page tables for VMID0,
   5839  * sets up the hw for VMIDs 1-15 which are allocated on
   5840  * demand, and sets up the global locations for the LDS, GDS,
   5841  * and GPUVM for FSA64 clients (CIK).
   5842  * Returns 0 for success, errors for failure.
   5843  */
   5844 static int cik_pcie_gart_enable(struct radeon_device *rdev)
   5845 {
   5846 	int r, i;
   5847 
   5848 	if (rdev->gart.robj == NULL) {
   5849 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
   5850 		return -EINVAL;
   5851 	}
   5852 	r = radeon_gart_table_vram_pin(rdev);
   5853 	if (r)
   5854 		return r;
   5855 	/* Setup TLB control */
   5856 	WREG32(MC_VM_MX_L1_TLB_CNTL,
   5857 	       (0xA << 7) |
   5858 	       ENABLE_L1_TLB |
   5859 	       ENABLE_L1_FRAGMENT_PROCESSING |
   5860 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
   5861 	       ENABLE_ADVANCED_DRIVER_MODEL |
   5862 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
   5863 	/* Setup L2 cache */
   5864 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
   5865 	       ENABLE_L2_FRAGMENT_PROCESSING |
   5866 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
   5867 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
   5868 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
   5869 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
   5870 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
   5871 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
   5872 	       BANK_SELECT(4) |
   5873 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
   5874 	/* setup context0 */
   5875 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
   5876 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
   5877 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
   5878 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
   5879 			(u32)(rdev->dummy_page.addr >> 12));
   5880 	WREG32(VM_CONTEXT0_CNTL2, 0);
   5881 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
   5882 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
   5883 
   5884 	WREG32(0x15D4, 0);
   5885 	WREG32(0x15D8, 0);
   5886 	WREG32(0x15DC, 0);
   5887 
   5888 	/* restore context1-15 */
   5889 	/* set vm size, must be a multiple of 4 */
   5890 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
   5891 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
   5892 	for (i = 1; i < 16; i++) {
   5893 		if (i < 8)
   5894 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
   5895 			       rdev->vm_manager.saved_table_addr[i]);
   5896 		else
   5897 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
   5898 			       rdev->vm_manager.saved_table_addr[i]);
   5899 	}
   5900 
   5901 	/* enable context1-15 */
   5902 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
   5903 	       (u32)(rdev->dummy_page.addr >> 12));
   5904 	WREG32(VM_CONTEXT1_CNTL2, 4);
   5905 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
   5906 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
   5907 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5908 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
   5909 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5910 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
   5911 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5912 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
   5913 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5914 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
   5915 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5916 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
   5917 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5918 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
   5919 
   5920 	if (rdev->family == CHIP_KAVERI) {
   5921 		u32 tmp = RREG32(CHUB_CONTROL);
   5922 		tmp &= ~BYPASS_VM;
   5923 		WREG32(CHUB_CONTROL, tmp);
   5924 	}
   5925 
   5926 	/* XXX SH_MEM regs */
   5927 	/* where to put LDS, scratch, GPUVM in FSA64 space */
   5928 	mutex_lock(&rdev->srbm_mutex);
   5929 	for (i = 0; i < 16; i++) {
   5930 		cik_srbm_select(rdev, 0, 0, 0, i);
   5931 		/* CP and shaders */
   5932 		WREG32(SH_MEM_CONFIG, 0);
   5933 		WREG32(SH_MEM_APE1_BASE, 1);
   5934 		WREG32(SH_MEM_APE1_LIMIT, 0);
   5935 		WREG32(SH_MEM_BASES, 0);
   5936 		/* SDMA GFX */
   5937 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
   5938 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
   5939 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
   5940 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
   5941 		/* XXX SDMA RLC - todo */
   5942 	}
   5943 	cik_srbm_select(rdev, 0, 0, 0, 0);
   5944 	mutex_unlock(&rdev->srbm_mutex);
   5945 
   5946 	cik_pcie_init_compute_vmid(rdev);
   5947 
   5948 	cik_pcie_gart_tlb_flush(rdev);
   5949 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
   5950 		 (unsigned)(rdev->mc.gtt_size >> 20),
   5951 		 (unsigned long long)rdev->gart.table_addr);
   5952 	rdev->gart.ready = true;
   5953 	return 0;
   5954 }
   5955 
   5956 /**
   5957  * cik_pcie_gart_disable - gart disable
   5958  *
   5959  * @rdev: radeon_device pointer
   5960  *
   5961  * This disables all VM page table (CIK).
   5962  */
   5963 static void cik_pcie_gart_disable(struct radeon_device *rdev)
   5964 {
   5965 	unsigned i;
   5966 
   5967 	for (i = 1; i < 16; ++i) {
   5968 		uint32_t reg;
   5969 		if (i < 8)
   5970 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
   5971 		else
   5972 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
   5973 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
   5974 	}
   5975 
   5976 	/* Disable all tables */
   5977 	WREG32(VM_CONTEXT0_CNTL, 0);
   5978 	WREG32(VM_CONTEXT1_CNTL, 0);
   5979 	/* Setup TLB control */
   5980 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
   5981 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
   5982 	/* Setup L2 cache */
   5983 	WREG32(VM_L2_CNTL,
   5984 	       ENABLE_L2_FRAGMENT_PROCESSING |
   5985 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
   5986 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
   5987 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
   5988 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
   5989 	WREG32(VM_L2_CNTL2, 0);
   5990 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
   5991 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
   5992 	radeon_gart_table_vram_unpin(rdev);
   5993 }
   5994 
   5995 /**
   5996  * cik_pcie_gart_fini - vm fini callback
   5997  *
   5998  * @rdev: radeon_device pointer
   5999  *
   6000  * Tears down the driver GART/VM setup (CIK).
   6001  */
   6002 static void cik_pcie_gart_fini(struct radeon_device *rdev)
   6003 {
   6004 	cik_pcie_gart_disable(rdev);
   6005 	radeon_gart_table_vram_free(rdev);
   6006 	radeon_gart_fini(rdev);
   6007 }
   6008 
   6009 /* vm parser */
   6010 /**
   6011  * cik_ib_parse - vm ib_parse callback
   6012  *
   6013  * @rdev: radeon_device pointer
   6014  * @ib: indirect buffer pointer
   6015  *
   6016  * CIK uses hw IB checking so this is a nop (CIK).
   6017  */
   6018 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
   6019 {
   6020 	return 0;
   6021 }
   6022 
   6023 /*
   6024  * vm
   6025  * VMID 0 is the physical GPU addresses as used by the kernel.
   6026  * VMIDs 1-15 are used for userspace clients and are handled
   6027  * by the radeon vm/hsa code.
   6028  */
   6029 /**
   6030  * cik_vm_init - cik vm init callback
   6031  *
   6032  * @rdev: radeon_device pointer
   6033  *
   6034  * Inits cik specific vm parameters (number of VMs, base of vram for
   6035  * VMIDs 1-15) (CIK).
   6036  * Returns 0 for success.
   6037  */
   6038 int cik_vm_init(struct radeon_device *rdev)
   6039 {
   6040 	/*
   6041 	 * number of VMs
   6042 	 * VMID 0 is reserved for System
   6043 	 * radeon graphics/compute will use VMIDs 1-7
   6044 	 * amdkfd will use VMIDs 8-15
   6045 	 */
   6046 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
   6047 	/* base offset of vram pages */
   6048 	if (rdev->flags & RADEON_IS_IGP) {
   6049 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
   6050 		tmp <<= 22;
   6051 		rdev->vm_manager.vram_base_offset = tmp;
   6052 	} else
   6053 		rdev->vm_manager.vram_base_offset = 0;
   6054 
   6055 	return 0;
   6056 }
   6057 
   6058 /**
   6059  * cik_vm_fini - cik vm fini callback
   6060  *
   6061  * @rdev: radeon_device pointer
   6062  *
   6063  * Tear down any asic specific VM setup (CIK).
   6064  */
   6065 void cik_vm_fini(struct radeon_device *rdev)
   6066 {
   6067 }
   6068 
   6069 /**
   6070  * cik_vm_decode_fault - print human readable fault info
   6071  *
   6072  * @rdev: radeon_device pointer
   6073  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
   6074  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
   6075  *
   6076  * Print human readable fault information (CIK).
   6077  */
   6078 static void cik_vm_decode_fault(struct radeon_device *rdev,
   6079 				u32 status, u32 addr, u32 mc_client)
   6080 {
   6081 	u32 mc_id;
   6082 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
   6083 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
   6084 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
   6085 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
   6086 
   6087 	if (rdev->family == CHIP_HAWAII)
   6088 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
   6089 	else
   6090 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
   6091 
   6092 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
   6093 	       protections, vmid, addr,
   6094 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
   6095 	       block, mc_client, mc_id);
   6096 }
   6097 
   6098 /**
   6099  * cik_vm_flush - cik vm flush using the CP
   6100  *
   6101  * @rdev: radeon_device pointer
   6102  *
   6103  * Update the page table base and flush the VM TLB
   6104  * using the CP (CIK).
   6105  */
   6106 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
   6107 		  unsigned vm_id, uint64_t pd_addr)
   6108 {
   6109 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
   6110 
   6111 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   6112 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   6113 				 WRITE_DATA_DST_SEL(0)));
   6114 	if (vm_id < 8) {
   6115 		radeon_ring_write(ring,
   6116 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
   6117 	} else {
   6118 		radeon_ring_write(ring,
   6119 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
   6120 	}
   6121 	radeon_ring_write(ring, 0);
   6122 	radeon_ring_write(ring, pd_addr >> 12);
   6123 
   6124 	/* update SH_MEM_* regs */
   6125 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   6126 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   6127 				 WRITE_DATA_DST_SEL(0)));
   6128 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
   6129 	radeon_ring_write(ring, 0);
   6130 	radeon_ring_write(ring, VMID(vm_id));
   6131 
   6132 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
   6133 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   6134 				 WRITE_DATA_DST_SEL(0)));
   6135 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
   6136 	radeon_ring_write(ring, 0);
   6137 
   6138 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
   6139 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
   6140 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
   6141 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
   6142 
   6143 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   6144 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   6145 				 WRITE_DATA_DST_SEL(0)));
   6146 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
   6147 	radeon_ring_write(ring, 0);
   6148 	radeon_ring_write(ring, VMID(0));
   6149 
   6150 	/* HDP flush */
   6151 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
   6152 
   6153 	/* bits 0-15 are the VM contexts0-15 */
   6154 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   6155 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   6156 				 WRITE_DATA_DST_SEL(0)));
   6157 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
   6158 	radeon_ring_write(ring, 0);
   6159 	radeon_ring_write(ring, 1 << vm_id);
   6160 
   6161 	/* wait for the invalidate to complete */
   6162 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   6163 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
   6164 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
   6165 				 WAIT_REG_MEM_ENGINE(0))); /* me */
   6166 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
   6167 	radeon_ring_write(ring, 0);
   6168 	radeon_ring_write(ring, 0); /* ref */
   6169 	radeon_ring_write(ring, 0); /* mask */
   6170 	radeon_ring_write(ring, 0x20); /* poll interval */
   6171 
   6172 	/* compute doesn't have PFP */
   6173 	if (usepfp) {
   6174 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
   6175 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   6176 		radeon_ring_write(ring, 0x0);
   6177 	}
   6178 }
   6179 
   6180 /*
   6181  * RLC
   6182  * The RLC is a multi-purpose microengine that handles a
   6183  * variety of functions, the most important of which is
   6184  * the interrupt controller.
   6185  */
   6186 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
   6187 					  bool enable)
   6188 {
   6189 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
   6190 
   6191 	if (enable)
   6192 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   6193 	else
   6194 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   6195 	WREG32(CP_INT_CNTL_RING0, tmp);
   6196 }
   6197 
   6198 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
   6199 {
   6200 	u32 tmp;
   6201 
   6202 	tmp = RREG32(RLC_LB_CNTL);
   6203 	if (enable)
   6204 		tmp |= LOAD_BALANCE_ENABLE;
   6205 	else
   6206 		tmp &= ~LOAD_BALANCE_ENABLE;
   6207 	WREG32(RLC_LB_CNTL, tmp);
   6208 }
   6209 
   6210 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
   6211 {
   6212 	u32 i, j, k;
   6213 	u32 mask;
   6214 
   6215 	mutex_lock(&rdev->grbm_idx_mutex);
   6216 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
   6217 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
   6218 			cik_select_se_sh(rdev, i, j);
   6219 			for (k = 0; k < rdev->usec_timeout; k++) {
   6220 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
   6221 					break;
   6222 				udelay(1);
   6223 			}
   6224 		}
   6225 	}
   6226 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6227 	mutex_unlock(&rdev->grbm_idx_mutex);
   6228 
   6229 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
   6230 	for (k = 0; k < rdev->usec_timeout; k++) {
   6231 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
   6232 			break;
   6233 		udelay(1);
   6234 	}
   6235 }
   6236 
   6237 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
   6238 {
   6239 	u32 tmp;
   6240 
   6241 	tmp = RREG32(RLC_CNTL);
   6242 	if (tmp != rlc)
   6243 		WREG32(RLC_CNTL, rlc);
   6244 }
   6245 
   6246 static u32 cik_halt_rlc(struct radeon_device *rdev)
   6247 {
   6248 	u32 data, orig;
   6249 
   6250 	orig = data = RREG32(RLC_CNTL);
   6251 
   6252 	if (data & RLC_ENABLE) {
   6253 		u32 i;
   6254 
   6255 		data &= ~RLC_ENABLE;
   6256 		WREG32(RLC_CNTL, data);
   6257 
   6258 		for (i = 0; i < rdev->usec_timeout; i++) {
   6259 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
   6260 				break;
   6261 			udelay(1);
   6262 		}
   6263 
   6264 		cik_wait_for_rlc_serdes(rdev);
   6265 	}
   6266 
   6267 	return orig;
   6268 }
   6269 
   6270 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
   6271 {
   6272 	u32 tmp, i, mask;
   6273 
   6274 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
   6275 	WREG32(RLC_GPR_REG2, tmp);
   6276 
   6277 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
   6278 	for (i = 0; i < rdev->usec_timeout; i++) {
   6279 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
   6280 			break;
   6281 		udelay(1);
   6282 	}
   6283 
   6284 	for (i = 0; i < rdev->usec_timeout; i++) {
   6285 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
   6286 			break;
   6287 		udelay(1);
   6288 	}
   6289 }
   6290 
   6291 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
   6292 {
   6293 	u32 tmp;
   6294 
   6295 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
   6296 	WREG32(RLC_GPR_REG2, tmp);
   6297 }
   6298 
   6299 /**
   6300  * cik_rlc_stop - stop the RLC ME
   6301  *
   6302  * @rdev: radeon_device pointer
   6303  *
   6304  * Halt the RLC ME (MicroEngine) (CIK).
   6305  */
   6306 static void cik_rlc_stop(struct radeon_device *rdev)
   6307 {
   6308 	WREG32(RLC_CNTL, 0);
   6309 
   6310 	cik_enable_gui_idle_interrupt(rdev, false);
   6311 
   6312 	cik_wait_for_rlc_serdes(rdev);
   6313 }
   6314 
   6315 /**
   6316  * cik_rlc_start - start the RLC ME
   6317  *
   6318  * @rdev: radeon_device pointer
   6319  *
   6320  * Unhalt the RLC ME (MicroEngine) (CIK).
   6321  */
   6322 static void cik_rlc_start(struct radeon_device *rdev)
   6323 {
   6324 	WREG32(RLC_CNTL, RLC_ENABLE);
   6325 
   6326 	cik_enable_gui_idle_interrupt(rdev, true);
   6327 
   6328 	udelay(50);
   6329 }
   6330 
   6331 /**
   6332  * cik_rlc_resume - setup the RLC hw
   6333  *
   6334  * @rdev: radeon_device pointer
   6335  *
   6336  * Initialize the RLC registers, load the ucode,
   6337  * and start the RLC (CIK).
   6338  * Returns 0 for success, -EINVAL if the ucode is not available.
   6339  */
   6340 static int cik_rlc_resume(struct radeon_device *rdev)
   6341 {
   6342 	u32 i, size, tmp;
   6343 
   6344 	if (!rdev->rlc_fw)
   6345 		return -EINVAL;
   6346 
   6347 	cik_rlc_stop(rdev);
   6348 
   6349 	/* disable CG */
   6350 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
   6351 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
   6352 
   6353 	si_rlc_reset(rdev);
   6354 
   6355 	cik_init_pg(rdev);
   6356 
   6357 	cik_init_cg(rdev);
   6358 
   6359 	WREG32(RLC_LB_CNTR_INIT, 0);
   6360 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
   6361 
   6362 	mutex_lock(&rdev->grbm_idx_mutex);
   6363 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6364 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
   6365 	WREG32(RLC_LB_PARAMS, 0x00600408);
   6366 	WREG32(RLC_LB_CNTL, 0x80000004);
   6367 	mutex_unlock(&rdev->grbm_idx_mutex);
   6368 
   6369 	WREG32(RLC_MC_CNTL, 0);
   6370 	WREG32(RLC_UCODE_CNTL, 0);
   6371 
   6372 	if (rdev->new_fw) {
   6373 		const struct rlc_firmware_header_v1_0 *hdr =
   6374 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
   6375 		const __le32 *fw_data = (const __le32 *)
   6376 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6377 
   6378 		radeon_ucode_print_rlc_hdr(&hdr->header);
   6379 
   6380 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
   6381 		WREG32(RLC_GPM_UCODE_ADDR, 0);
   6382 		for (i = 0; i < size; i++)
   6383 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
   6384 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
   6385 	} else {
   6386 		const __be32 *fw_data;
   6387 
   6388 		switch (rdev->family) {
   6389 		case CHIP_BONAIRE:
   6390 		case CHIP_HAWAII:
   6391 		default:
   6392 			size = BONAIRE_RLC_UCODE_SIZE;
   6393 			break;
   6394 		case CHIP_KAVERI:
   6395 			size = KV_RLC_UCODE_SIZE;
   6396 			break;
   6397 		case CHIP_KABINI:
   6398 			size = KB_RLC_UCODE_SIZE;
   6399 			break;
   6400 		case CHIP_MULLINS:
   6401 			size = ML_RLC_UCODE_SIZE;
   6402 			break;
   6403 		}
   6404 
   6405 		fw_data = (const __be32 *)rdev->rlc_fw->data;
   6406 		WREG32(RLC_GPM_UCODE_ADDR, 0);
   6407 		for (i = 0; i < size; i++)
   6408 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
   6409 		WREG32(RLC_GPM_UCODE_ADDR, 0);
   6410 	}
   6411 
   6412 	/* XXX - find out what chips support lbpw */
   6413 	cik_enable_lbpw(rdev, false);
   6414 
   6415 	if (rdev->family == CHIP_BONAIRE)
   6416 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
   6417 
   6418 	cik_rlc_start(rdev);
   6419 
   6420 	return 0;
   6421 }
   6422 
   6423 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
   6424 {
   6425 	u32 data, orig, tmp, tmp2;
   6426 
   6427 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
   6428 
   6429 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
   6430 		cik_enable_gui_idle_interrupt(rdev, true);
   6431 
   6432 		tmp = cik_halt_rlc(rdev);
   6433 
   6434 		mutex_lock(&rdev->grbm_idx_mutex);
   6435 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6436 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   6437 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   6438 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
   6439 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
   6440 		mutex_unlock(&rdev->grbm_idx_mutex);
   6441 
   6442 		cik_update_rlc(rdev, tmp);
   6443 
   6444 		data |= CGCG_EN | CGLS_EN;
   6445 	} else {
   6446 		cik_enable_gui_idle_interrupt(rdev, false);
   6447 
   6448 		RREG32(CB_CGTT_SCLK_CTRL);
   6449 		RREG32(CB_CGTT_SCLK_CTRL);
   6450 		RREG32(CB_CGTT_SCLK_CTRL);
   6451 		RREG32(CB_CGTT_SCLK_CTRL);
   6452 
   6453 		data &= ~(CGCG_EN | CGLS_EN);
   6454 	}
   6455 
   6456 	if (orig != data)
   6457 		WREG32(RLC_CGCG_CGLS_CTRL, data);
   6458 
   6459 }
   6460 
   6461 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
   6462 {
   6463 	u32 data, orig, tmp = 0;
   6464 
   6465 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
   6466 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
   6467 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
   6468 				orig = data = RREG32(CP_MEM_SLP_CNTL);
   6469 				data |= CP_MEM_LS_EN;
   6470 				if (orig != data)
   6471 					WREG32(CP_MEM_SLP_CNTL, data);
   6472 			}
   6473 		}
   6474 
   6475 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
   6476 		data |= 0x00000001;
   6477 		data &= 0xfffffffd;
   6478 		if (orig != data)
   6479 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
   6480 
   6481 		tmp = cik_halt_rlc(rdev);
   6482 
   6483 		mutex_lock(&rdev->grbm_idx_mutex);
   6484 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6485 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   6486 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   6487 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
   6488 		WREG32(RLC_SERDES_WR_CTRL, data);
   6489 		mutex_unlock(&rdev->grbm_idx_mutex);
   6490 
   6491 		cik_update_rlc(rdev, tmp);
   6492 
   6493 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
   6494 			orig = data = RREG32(CGTS_SM_CTRL_REG);
   6495 			data &= ~SM_MODE_MASK;
   6496 			data |= SM_MODE(0x2);
   6497 			data |= SM_MODE_ENABLE;
   6498 			data &= ~CGTS_OVERRIDE;
   6499 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
   6500 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
   6501 				data &= ~CGTS_LS_OVERRIDE;
   6502 			data &= ~ON_MONITOR_ADD_MASK;
   6503 			data |= ON_MONITOR_ADD_EN;
   6504 			data |= ON_MONITOR_ADD(0x96);
   6505 			if (orig != data)
   6506 				WREG32(CGTS_SM_CTRL_REG, data);
   6507 		}
   6508 	} else {
   6509 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
   6510 		data |= 0x00000003;
   6511 		if (orig != data)
   6512 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
   6513 
   6514 		data = RREG32(RLC_MEM_SLP_CNTL);
   6515 		if (data & RLC_MEM_LS_EN) {
   6516 			data &= ~RLC_MEM_LS_EN;
   6517 			WREG32(RLC_MEM_SLP_CNTL, data);
   6518 		}
   6519 
   6520 		data = RREG32(CP_MEM_SLP_CNTL);
   6521 		if (data & CP_MEM_LS_EN) {
   6522 			data &= ~CP_MEM_LS_EN;
   6523 			WREG32(CP_MEM_SLP_CNTL, data);
   6524 		}
   6525 
   6526 		orig = data = RREG32(CGTS_SM_CTRL_REG);
   6527 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
   6528 		if (orig != data)
   6529 			WREG32(CGTS_SM_CTRL_REG, data);
   6530 
   6531 		tmp = cik_halt_rlc(rdev);
   6532 
   6533 		mutex_lock(&rdev->grbm_idx_mutex);
   6534 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6535 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   6536 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   6537 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
   6538 		WREG32(RLC_SERDES_WR_CTRL, data);
   6539 		mutex_unlock(&rdev->grbm_idx_mutex);
   6540 
   6541 		cik_update_rlc(rdev, tmp);
   6542 	}
   6543 }
   6544 
   6545 static const u32 mc_cg_registers[] =
   6546 {
   6547 	MC_HUB_MISC_HUB_CG,
   6548 	MC_HUB_MISC_SIP_CG,
   6549 	MC_HUB_MISC_VM_CG,
   6550 	MC_XPB_CLK_GAT,
   6551 	ATC_MISC_CG,
   6552 	MC_CITF_MISC_WR_CG,
   6553 	MC_CITF_MISC_RD_CG,
   6554 	MC_CITF_MISC_VM_CG,
   6555 	VM_L2_CG,
   6556 };
   6557 
   6558 static void cik_enable_mc_ls(struct radeon_device *rdev,
   6559 			     bool enable)
   6560 {
   6561 	int i;
   6562 	u32 orig, data;
   6563 
   6564 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
   6565 		orig = data = RREG32(mc_cg_registers[i]);
   6566 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
   6567 			data |= MC_LS_ENABLE;
   6568 		else
   6569 			data &= ~MC_LS_ENABLE;
   6570 		if (data != orig)
   6571 			WREG32(mc_cg_registers[i], data);
   6572 	}
   6573 }
   6574 
   6575 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
   6576 			       bool enable)
   6577 {
   6578 	int i;
   6579 	u32 orig, data;
   6580 
   6581 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
   6582 		orig = data = RREG32(mc_cg_registers[i]);
   6583 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
   6584 			data |= MC_CG_ENABLE;
   6585 		else
   6586 			data &= ~MC_CG_ENABLE;
   6587 		if (data != orig)
   6588 			WREG32(mc_cg_registers[i], data);
   6589 	}
   6590 }
   6591 
   6592 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
   6593 				 bool enable)
   6594 {
   6595 	u32 orig, data;
   6596 
   6597 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
   6598 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
   6599 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
   6600 	} else {
   6601 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
   6602 		data |= 0xff000000;
   6603 		if (data != orig)
   6604 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
   6605 
   6606 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
   6607 		data |= 0xff000000;
   6608 		if (data != orig)
   6609 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
   6610 	}
   6611 }
   6612 
   6613 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
   6614 				 bool enable)
   6615 {
   6616 	u32 orig, data;
   6617 
   6618 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
   6619 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
   6620 		data |= 0x100;
   6621 		if (orig != data)
   6622 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
   6623 
   6624 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
   6625 		data |= 0x100;
   6626 		if (orig != data)
   6627 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
   6628 	} else {
   6629 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
   6630 		data &= ~0x100;
   6631 		if (orig != data)
   6632 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
   6633 
   6634 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
   6635 		data &= ~0x100;
   6636 		if (orig != data)
   6637 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
   6638 	}
   6639 }
   6640 
   6641 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
   6642 				bool enable)
   6643 {
   6644 	u32 orig, data;
   6645 
   6646 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
   6647 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
   6648 		data = 0xfff;
   6649 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
   6650 
   6651 		orig = data = RREG32(UVD_CGC_CTRL);
   6652 		data |= DCM;
   6653 		if (orig != data)
   6654 			WREG32(UVD_CGC_CTRL, data);
   6655 	} else {
   6656 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
   6657 		data &= ~0xfff;
   6658 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
   6659 
   6660 		orig = data = RREG32(UVD_CGC_CTRL);
   6661 		data &= ~DCM;
   6662 		if (orig != data)
   6663 			WREG32(UVD_CGC_CTRL, data);
   6664 	}
   6665 }
   6666 
   6667 static void cik_enable_bif_mgls(struct radeon_device *rdev,
   6668 			       bool enable)
   6669 {
   6670 	u32 orig, data;
   6671 
   6672 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
   6673 
   6674 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
   6675 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
   6676 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
   6677 	else
   6678 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
   6679 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
   6680 
   6681 	if (orig != data)
   6682 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
   6683 }
   6684 
   6685 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
   6686 				bool enable)
   6687 {
   6688 	u32 orig, data;
   6689 
   6690 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
   6691 
   6692 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
   6693 		data &= ~CLOCK_GATING_DIS;
   6694 	else
   6695 		data |= CLOCK_GATING_DIS;
   6696 
   6697 	if (orig != data)
   6698 		WREG32(HDP_HOST_PATH_CNTL, data);
   6699 }
   6700 
   6701 static void cik_enable_hdp_ls(struct radeon_device *rdev,
   6702 			      bool enable)
   6703 {
   6704 	u32 orig, data;
   6705 
   6706 	orig = data = RREG32(HDP_MEM_POWER_LS);
   6707 
   6708 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
   6709 		data |= HDP_LS_ENABLE;
   6710 	else
   6711 		data &= ~HDP_LS_ENABLE;
   6712 
   6713 	if (orig != data)
   6714 		WREG32(HDP_MEM_POWER_LS, data);
   6715 }
   6716 
   6717 void cik_update_cg(struct radeon_device *rdev,
   6718 		   u32 block, bool enable)
   6719 {
   6720 
   6721 	if (block & RADEON_CG_BLOCK_GFX) {
   6722 		cik_enable_gui_idle_interrupt(rdev, false);
   6723 		/* order matters! */
   6724 		if (enable) {
   6725 			cik_enable_mgcg(rdev, true);
   6726 			cik_enable_cgcg(rdev, true);
   6727 		} else {
   6728 			cik_enable_cgcg(rdev, false);
   6729 			cik_enable_mgcg(rdev, false);
   6730 		}
   6731 		cik_enable_gui_idle_interrupt(rdev, true);
   6732 	}
   6733 
   6734 	if (block & RADEON_CG_BLOCK_MC) {
   6735 		if (!(rdev->flags & RADEON_IS_IGP)) {
   6736 			cik_enable_mc_mgcg(rdev, enable);
   6737 			cik_enable_mc_ls(rdev, enable);
   6738 		}
   6739 	}
   6740 
   6741 	if (block & RADEON_CG_BLOCK_SDMA) {
   6742 		cik_enable_sdma_mgcg(rdev, enable);
   6743 		cik_enable_sdma_mgls(rdev, enable);
   6744 	}
   6745 
   6746 	if (block & RADEON_CG_BLOCK_BIF) {
   6747 		cik_enable_bif_mgls(rdev, enable);
   6748 	}
   6749 
   6750 	if (block & RADEON_CG_BLOCK_UVD) {
   6751 		if (rdev->has_uvd)
   6752 			cik_enable_uvd_mgcg(rdev, enable);
   6753 	}
   6754 
   6755 	if (block & RADEON_CG_BLOCK_HDP) {
   6756 		cik_enable_hdp_mgcg(rdev, enable);
   6757 		cik_enable_hdp_ls(rdev, enable);
   6758 	}
   6759 
   6760 	if (block & RADEON_CG_BLOCK_VCE) {
   6761 		vce_v2_0_enable_mgcg(rdev, enable);
   6762 	}
   6763 }
   6764 
   6765 static void cik_init_cg(struct radeon_device *rdev)
   6766 {
   6767 
   6768 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
   6769 
   6770 	if (rdev->has_uvd)
   6771 		si_init_uvd_internal_cg(rdev);
   6772 
   6773 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
   6774 			     RADEON_CG_BLOCK_SDMA |
   6775 			     RADEON_CG_BLOCK_BIF |
   6776 			     RADEON_CG_BLOCK_UVD |
   6777 			     RADEON_CG_BLOCK_HDP), true);
   6778 }
   6779 
   6780 static void cik_fini_cg(struct radeon_device *rdev)
   6781 {
   6782 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
   6783 			     RADEON_CG_BLOCK_SDMA |
   6784 			     RADEON_CG_BLOCK_BIF |
   6785 			     RADEON_CG_BLOCK_UVD |
   6786 			     RADEON_CG_BLOCK_HDP), false);
   6787 
   6788 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
   6789 }
   6790 
   6791 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
   6792 					  bool enable)
   6793 {
   6794 	u32 data, orig;
   6795 
   6796 	orig = data = RREG32(RLC_PG_CNTL);
   6797 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
   6798 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
   6799 	else
   6800 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
   6801 	if (orig != data)
   6802 		WREG32(RLC_PG_CNTL, data);
   6803 }
   6804 
   6805 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
   6806 					  bool enable)
   6807 {
   6808 	u32 data, orig;
   6809 
   6810 	orig = data = RREG32(RLC_PG_CNTL);
   6811 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
   6812 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
   6813 	else
   6814 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
   6815 	if (orig != data)
   6816 		WREG32(RLC_PG_CNTL, data);
   6817 }
   6818 
   6819 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
   6820 {
   6821 	u32 data, orig;
   6822 
   6823 	orig = data = RREG32(RLC_PG_CNTL);
   6824 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
   6825 		data &= ~DISABLE_CP_PG;
   6826 	else
   6827 		data |= DISABLE_CP_PG;
   6828 	if (orig != data)
   6829 		WREG32(RLC_PG_CNTL, data);
   6830 }
   6831 
   6832 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
   6833 {
   6834 	u32 data, orig;
   6835 
   6836 	orig = data = RREG32(RLC_PG_CNTL);
   6837 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
   6838 		data &= ~DISABLE_GDS_PG;
   6839 	else
   6840 		data |= DISABLE_GDS_PG;
   6841 	if (orig != data)
   6842 		WREG32(RLC_PG_CNTL, data);
   6843 }
   6844 
   6845 #define CP_ME_TABLE_SIZE    96
   6846 #define CP_ME_TABLE_OFFSET  2048
   6847 #define CP_MEC_TABLE_OFFSET 4096
   6848 
   6849 void cik_init_cp_pg_table(struct radeon_device *rdev)
   6850 {
   6851 	volatile u32 *dst_ptr;
   6852 	int me, i, max_me = 4;
   6853 	u32 bo_offset = 0;
   6854 	u32 table_offset, table_size;
   6855 
   6856 	if (rdev->family == CHIP_KAVERI)
   6857 		max_me = 5;
   6858 
   6859 	if (rdev->rlc.cp_table_ptr == NULL)
   6860 		return;
   6861 
   6862 	/* write the cp table buffer */
   6863 	dst_ptr = rdev->rlc.cp_table_ptr;
   6864 	for (me = 0; me < max_me; me++) {
   6865 		if (rdev->new_fw) {
   6866 			const __le32 *fw_data;
   6867 			const struct gfx_firmware_header_v1_0 *hdr;
   6868 
   6869 			if (me == 0) {
   6870 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
   6871 				fw_data = (const __le32 *)
   6872 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6873 				table_offset = le32_to_cpu(hdr->jt_offset);
   6874 				table_size = le32_to_cpu(hdr->jt_size);
   6875 			} else if (me == 1) {
   6876 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
   6877 				fw_data = (const __le32 *)
   6878 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6879 				table_offset = le32_to_cpu(hdr->jt_offset);
   6880 				table_size = le32_to_cpu(hdr->jt_size);
   6881 			} else if (me == 2) {
   6882 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
   6883 				fw_data = (const __le32 *)
   6884 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6885 				table_offset = le32_to_cpu(hdr->jt_offset);
   6886 				table_size = le32_to_cpu(hdr->jt_size);
   6887 			} else if (me == 3) {
   6888 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
   6889 				fw_data = (const __le32 *)
   6890 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6891 				table_offset = le32_to_cpu(hdr->jt_offset);
   6892 				table_size = le32_to_cpu(hdr->jt_size);
   6893 			} else {
   6894 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
   6895 				fw_data = (const __le32 *)
   6896 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6897 				table_offset = le32_to_cpu(hdr->jt_offset);
   6898 				table_size = le32_to_cpu(hdr->jt_size);
   6899 			}
   6900 
   6901 			for (i = 0; i < table_size; i ++) {
   6902 				dst_ptr[bo_offset + i] =
   6903 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
   6904 			}
   6905 			bo_offset += table_size;
   6906 		} else {
   6907 			const __be32 *fw_data;
   6908 			table_size = CP_ME_TABLE_SIZE;
   6909 
   6910 			if (me == 0) {
   6911 				fw_data = (const __be32 *)rdev->ce_fw->data;
   6912 				table_offset = CP_ME_TABLE_OFFSET;
   6913 			} else if (me == 1) {
   6914 				fw_data = (const __be32 *)rdev->pfp_fw->data;
   6915 				table_offset = CP_ME_TABLE_OFFSET;
   6916 			} else if (me == 2) {
   6917 				fw_data = (const __be32 *)rdev->me_fw->data;
   6918 				table_offset = CP_ME_TABLE_OFFSET;
   6919 			} else {
   6920 				fw_data = (const __be32 *)rdev->mec_fw->data;
   6921 				table_offset = CP_MEC_TABLE_OFFSET;
   6922 			}
   6923 
   6924 			for (i = 0; i < table_size; i ++) {
   6925 				dst_ptr[bo_offset + i] =
   6926 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
   6927 			}
   6928 			bo_offset += table_size;
   6929 		}
   6930 	}
   6931 }
   6932 
   6933 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
   6934 				bool enable)
   6935 {
   6936 	u32 data, orig;
   6937 
   6938 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
   6939 		orig = data = RREG32(RLC_PG_CNTL);
   6940 		data |= GFX_PG_ENABLE;
   6941 		if (orig != data)
   6942 			WREG32(RLC_PG_CNTL, data);
   6943 
   6944 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
   6945 		data |= AUTO_PG_EN;
   6946 		if (orig != data)
   6947 			WREG32(RLC_AUTO_PG_CTRL, data);
   6948 	} else {
   6949 		orig = data = RREG32(RLC_PG_CNTL);
   6950 		data &= ~GFX_PG_ENABLE;
   6951 		if (orig != data)
   6952 			WREG32(RLC_PG_CNTL, data);
   6953 
   6954 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
   6955 		data &= ~AUTO_PG_EN;
   6956 		if (orig != data)
   6957 			WREG32(RLC_AUTO_PG_CTRL, data);
   6958 
   6959 		data = RREG32(DB_RENDER_CONTROL);
   6960 	}
   6961 }
   6962 
   6963 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
   6964 {
   6965 	u32 mask = 0, tmp, tmp1;
   6966 	int i;
   6967 
   6968 	mutex_lock(&rdev->grbm_idx_mutex);
   6969 	cik_select_se_sh(rdev, se, sh);
   6970 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
   6971 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
   6972 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6973 	mutex_unlock(&rdev->grbm_idx_mutex);
   6974 
   6975 	tmp &= 0xffff0000;
   6976 
   6977 	tmp |= tmp1;
   6978 	tmp >>= 16;
   6979 
   6980 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
   6981 		mask <<= 1;
   6982 		mask |= 1;
   6983 	}
   6984 
   6985 	return (~tmp) & mask;
   6986 }
   6987 
   6988 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
   6989 {
   6990 	u32 i, j, k, active_cu_number = 0;
   6991 	u32 mask, counter, cu_bitmap;
   6992 	u32 tmp = 0;
   6993 
   6994 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
   6995 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
   6996 			mask = 1;
   6997 			cu_bitmap = 0;
   6998 			counter = 0;
   6999 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
   7000 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
   7001 					if (counter < 2)
   7002 						cu_bitmap |= mask;
   7003 					counter ++;
   7004 				}
   7005 				mask <<= 1;
   7006 			}
   7007 
   7008 			active_cu_number += counter;
   7009 			tmp |= (cu_bitmap << (i * 16 + j * 8));
   7010 		}
   7011 	}
   7012 
   7013 	WREG32(RLC_PG_AO_CU_MASK, tmp);
   7014 
   7015 	tmp = RREG32(RLC_MAX_PG_CU);
   7016 	tmp &= ~MAX_PU_CU_MASK;
   7017 	tmp |= MAX_PU_CU(active_cu_number);
   7018 	WREG32(RLC_MAX_PG_CU, tmp);
   7019 }
   7020 
   7021 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
   7022 				       bool enable)
   7023 {
   7024 	u32 data, orig;
   7025 
   7026 	orig = data = RREG32(RLC_PG_CNTL);
   7027 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
   7028 		data |= STATIC_PER_CU_PG_ENABLE;
   7029 	else
   7030 		data &= ~STATIC_PER_CU_PG_ENABLE;
   7031 	if (orig != data)
   7032 		WREG32(RLC_PG_CNTL, data);
   7033 }
   7034 
   7035 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
   7036 					bool enable)
   7037 {
   7038 	u32 data, orig;
   7039 
   7040 	orig = data = RREG32(RLC_PG_CNTL);
   7041 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
   7042 		data |= DYN_PER_CU_PG_ENABLE;
   7043 	else
   7044 		data &= ~DYN_PER_CU_PG_ENABLE;
   7045 	if (orig != data)
   7046 		WREG32(RLC_PG_CNTL, data);
   7047 }
   7048 
   7049 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
   7050 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
   7051 
   7052 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
   7053 {
   7054 	u32 data, orig;
   7055 	u32 i;
   7056 
   7057 	if (rdev->rlc.cs_data) {
   7058 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
   7059 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
   7060 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
   7061 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
   7062 	} else {
   7063 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
   7064 		for (i = 0; i < 3; i++)
   7065 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
   7066 	}
   7067 	if (rdev->rlc.reg_list) {
   7068 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
   7069 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
   7070 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
   7071 	}
   7072 
   7073 	orig = data = RREG32(RLC_PG_CNTL);
   7074 	data |= GFX_PG_SRC;
   7075 	if (orig != data)
   7076 		WREG32(RLC_PG_CNTL, data);
   7077 
   7078 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
   7079 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
   7080 
   7081 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
   7082 	data &= ~IDLE_POLL_COUNT_MASK;
   7083 	data |= IDLE_POLL_COUNT(0x60);
   7084 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
   7085 
   7086 	data = 0x10101010;
   7087 	WREG32(RLC_PG_DELAY, data);
   7088 
   7089 	data = RREG32(RLC_PG_DELAY_2);
   7090 	data &= ~0xff;
   7091 	data |= 0x3;
   7092 	WREG32(RLC_PG_DELAY_2, data);
   7093 
   7094 	data = RREG32(RLC_AUTO_PG_CTRL);
   7095 	data &= ~GRBM_REG_SGIT_MASK;
   7096 	data |= GRBM_REG_SGIT(0x700);
   7097 	WREG32(RLC_AUTO_PG_CTRL, data);
   7098 
   7099 }
   7100 
   7101 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
   7102 {
   7103 	cik_enable_gfx_cgpg(rdev, enable);
   7104 	cik_enable_gfx_static_mgpg(rdev, enable);
   7105 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
   7106 }
   7107 
   7108 u32 cik_get_csb_size(struct radeon_device *rdev)
   7109 {
   7110 	u32 count = 0;
   7111 	const struct cs_section_def *sect = NULL;
   7112 	const struct cs_extent_def *ext = NULL;
   7113 
   7114 	if (rdev->rlc.cs_data == NULL)
   7115 		return 0;
   7116 
   7117 	/* begin clear state */
   7118 	count += 2;
   7119 	/* context control state */
   7120 	count += 3;
   7121 
   7122 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
   7123 		for (ext = sect->section; ext->extent != NULL; ++ext) {
   7124 			if (sect->id == SECT_CONTEXT)
   7125 				count += 2 + ext->reg_count;
   7126 			else
   7127 				return 0;
   7128 		}
   7129 	}
   7130 	/* pa_sc_raster_config/pa_sc_raster_config1 */
   7131 	count += 4;
   7132 	/* end clear state */
   7133 	count += 2;
   7134 	/* clear state */
   7135 	count += 2;
   7136 
   7137 	return count;
   7138 }
   7139 
   7140 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
   7141 {
   7142 	u32 count = 0, i;
   7143 	const struct cs_section_def *sect = NULL;
   7144 	const struct cs_extent_def *ext = NULL;
   7145 
   7146 	if (rdev->rlc.cs_data == NULL)
   7147 		return;
   7148 	if (buffer == NULL)
   7149 		return;
   7150 
   7151 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   7152 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   7153 
   7154 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   7155 	buffer[count++] = cpu_to_le32(0x80000000);
   7156 	buffer[count++] = cpu_to_le32(0x80000000);
   7157 
   7158 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
   7159 		for (ext = sect->section; ext->extent != NULL; ++ext) {
   7160 			if (sect->id == SECT_CONTEXT) {
   7161 				buffer[count++] =
   7162 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
   7163 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
   7164 				for (i = 0; i < ext->reg_count; i++)
   7165 					buffer[count++] = cpu_to_le32(ext->extent[i]);
   7166 			} else {
   7167 				return;
   7168 			}
   7169 		}
   7170 	}
   7171 
   7172 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   7173 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
   7174 	switch (rdev->family) {
   7175 	case CHIP_BONAIRE:
   7176 		buffer[count++] = cpu_to_le32(0x16000012);
   7177 		buffer[count++] = cpu_to_le32(0x00000000);
   7178 		break;
   7179 	case CHIP_KAVERI:
   7180 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
   7181 		buffer[count++] = cpu_to_le32(0x00000000);
   7182 		break;
   7183 	case CHIP_KABINI:
   7184 	case CHIP_MULLINS:
   7185 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
   7186 		buffer[count++] = cpu_to_le32(0x00000000);
   7187 		break;
   7188 	case CHIP_HAWAII:
   7189 		buffer[count++] = cpu_to_le32(0x3a00161a);
   7190 		buffer[count++] = cpu_to_le32(0x0000002e);
   7191 		break;
   7192 	default:
   7193 		buffer[count++] = cpu_to_le32(0x00000000);
   7194 		buffer[count++] = cpu_to_le32(0x00000000);
   7195 		break;
   7196 	}
   7197 
   7198 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   7199 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
   7200 
   7201 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
   7202 	buffer[count++] = cpu_to_le32(0);
   7203 }
   7204 
   7205 static void cik_init_pg(struct radeon_device *rdev)
   7206 {
   7207 	if (rdev->pg_flags) {
   7208 		cik_enable_sck_slowdown_on_pu(rdev, true);
   7209 		cik_enable_sck_slowdown_on_pd(rdev, true);
   7210 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
   7211 			cik_init_gfx_cgpg(rdev);
   7212 			cik_enable_cp_pg(rdev, true);
   7213 			cik_enable_gds_pg(rdev, true);
   7214 		}
   7215 		cik_init_ao_cu_mask(rdev);
   7216 		cik_update_gfx_pg(rdev, true);
   7217 	}
   7218 }
   7219 
   7220 static void cik_fini_pg(struct radeon_device *rdev)
   7221 {
   7222 	if (rdev->pg_flags) {
   7223 		cik_update_gfx_pg(rdev, false);
   7224 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
   7225 			cik_enable_cp_pg(rdev, false);
   7226 			cik_enable_gds_pg(rdev, false);
   7227 		}
   7228 	}
   7229 }
   7230 
   7231 /*
   7232  * Interrupts
   7233  * Starting with r6xx, interrupts are handled via a ring buffer.
   7234  * Ring buffers are areas of GPU accessible memory that the GPU
   7235  * writes interrupt vectors into and the host reads vectors out of.
   7236  * There is a rptr (read pointer) that determines where the
   7237  * host is currently reading, and a wptr (write pointer)
   7238  * which determines where the GPU has written.  When the
   7239  * pointers are equal, the ring is idle.  When the GPU
   7240  * writes vectors to the ring buffer, it increments the
   7241  * wptr.  When there is an interrupt, the host then starts
   7242  * fetching commands and processing them until the pointers are
   7243  * equal again at which point it updates the rptr.
   7244  */
   7245 
   7246 /**
   7247  * cik_enable_interrupts - Enable the interrupt ring buffer
   7248  *
   7249  * @rdev: radeon_device pointer
   7250  *
   7251  * Enable the interrupt ring buffer (CIK).
   7252  */
   7253 static void cik_enable_interrupts(struct radeon_device *rdev)
   7254 {
   7255 	u32 ih_cntl = RREG32(IH_CNTL);
   7256 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
   7257 
   7258 	ih_cntl |= ENABLE_INTR;
   7259 	ih_rb_cntl |= IH_RB_ENABLE;
   7260 	WREG32(IH_CNTL, ih_cntl);
   7261 	WREG32(IH_RB_CNTL, ih_rb_cntl);
   7262 	rdev->ih.enabled = true;
   7263 }
   7264 
   7265 /**
   7266  * cik_disable_interrupts - Disable the interrupt ring buffer
   7267  *
   7268  * @rdev: radeon_device pointer
   7269  *
   7270  * Disable the interrupt ring buffer (CIK).
   7271  */
   7272 static void cik_disable_interrupts(struct radeon_device *rdev)
   7273 {
   7274 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
   7275 	u32 ih_cntl = RREG32(IH_CNTL);
   7276 
   7277 	ih_rb_cntl &= ~IH_RB_ENABLE;
   7278 	ih_cntl &= ~ENABLE_INTR;
   7279 	WREG32(IH_RB_CNTL, ih_rb_cntl);
   7280 	WREG32(IH_CNTL, ih_cntl);
   7281 	/* set rptr, wptr to 0 */
   7282 	WREG32(IH_RB_RPTR, 0);
   7283 	WREG32(IH_RB_WPTR, 0);
   7284 	rdev->ih.enabled = false;
   7285 	rdev->ih.rptr = 0;
   7286 }
   7287 
   7288 /**
   7289  * cik_disable_interrupt_state - Disable all interrupt sources
   7290  *
   7291  * @rdev: radeon_device pointer
   7292  *
   7293  * Clear all interrupt enable bits used by the driver (CIK).
   7294  */
   7295 static void cik_disable_interrupt_state(struct radeon_device *rdev)
   7296 {
   7297 	u32 tmp;
   7298 
   7299 	/* gfx ring */
   7300 	tmp = RREG32(CP_INT_CNTL_RING0) &
   7301 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   7302 	WREG32(CP_INT_CNTL_RING0, tmp);
   7303 	/* sdma */
   7304 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
   7305 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
   7306 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
   7307 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
   7308 	/* compute queues */
   7309 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
   7310 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
   7311 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
   7312 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
   7313 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
   7314 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
   7315 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
   7316 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
   7317 	/* grbm */
   7318 	WREG32(GRBM_INT_CNTL, 0);
   7319 	/* SRBM */
   7320 	WREG32(SRBM_INT_CNTL, 0);
   7321 	/* vline/vblank, etc. */
   7322 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
   7323 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
   7324 	if (rdev->num_crtc >= 4) {
   7325 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
   7326 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
   7327 	}
   7328 	if (rdev->num_crtc >= 6) {
   7329 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
   7330 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
   7331 	}
   7332 	/* pflip */
   7333 	if (rdev->num_crtc >= 2) {
   7334 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
   7335 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
   7336 	}
   7337 	if (rdev->num_crtc >= 4) {
   7338 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
   7339 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
   7340 	}
   7341 	if (rdev->num_crtc >= 6) {
   7342 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
   7343 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
   7344 	}
   7345 
   7346 	/* dac hotplug */
   7347 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
   7348 
   7349 	/* digital hotplug */
   7350 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   7351 	WREG32(DC_HPD1_INT_CONTROL, tmp);
   7352 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   7353 	WREG32(DC_HPD2_INT_CONTROL, tmp);
   7354 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   7355 	WREG32(DC_HPD3_INT_CONTROL, tmp);
   7356 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   7357 	WREG32(DC_HPD4_INT_CONTROL, tmp);
   7358 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   7359 	WREG32(DC_HPD5_INT_CONTROL, tmp);
   7360 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   7361 	WREG32(DC_HPD6_INT_CONTROL, tmp);
   7362 
   7363 }
   7364 
   7365 /**
   7366  * cik_irq_init - init and enable the interrupt ring
   7367  *
   7368  * @rdev: radeon_device pointer
   7369  *
   7370  * Allocate a ring buffer for the interrupt controller,
   7371  * enable the RLC, disable interrupts, enable the IH
   7372  * ring buffer and enable it (CIK).
   7373  * Called at device load and reume.
   7374  * Returns 0 for success, errors for failure.
   7375  */
   7376 static int cik_irq_init(struct radeon_device *rdev)
   7377 {
   7378 	int ret = 0;
   7379 	int rb_bufsz;
   7380 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
   7381 
   7382 	/* allocate ring */
   7383 	ret = r600_ih_ring_alloc(rdev);
   7384 	if (ret)
   7385 		return ret;
   7386 
   7387 	/* disable irqs */
   7388 	cik_disable_interrupts(rdev);
   7389 
   7390 	/* init rlc */
   7391 	ret = cik_rlc_resume(rdev);
   7392 	if (ret) {
   7393 		r600_ih_ring_fini(rdev);
   7394 		return ret;
   7395 	}
   7396 
   7397 	/* setup interrupt control */
   7398 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
   7399 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
   7400 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
   7401 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
   7402 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
   7403 	 */
   7404 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
   7405 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
   7406 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
   7407 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
   7408 
   7409 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
   7410 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
   7411 
   7412 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
   7413 		      IH_WPTR_OVERFLOW_CLEAR |
   7414 		      (rb_bufsz << 1));
   7415 
   7416 	if (rdev->wb.enabled)
   7417 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
   7418 
   7419 	/* set the writeback address whether it's enabled or not */
   7420 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
   7421 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
   7422 
   7423 	WREG32(IH_RB_CNTL, ih_rb_cntl);
   7424 
   7425 	/* set rptr, wptr to 0 */
   7426 	WREG32(IH_RB_RPTR, 0);
   7427 	WREG32(IH_RB_WPTR, 0);
   7428 
   7429 	/* Default settings for IH_CNTL (disabled at first) */
   7430 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
   7431 	/* RPTR_REARM only works if msi's are enabled */
   7432 	if (rdev->msi_enabled)
   7433 		ih_cntl |= RPTR_REARM;
   7434 	WREG32(IH_CNTL, ih_cntl);
   7435 
   7436 	/* force the active interrupt state to all disabled */
   7437 	cik_disable_interrupt_state(rdev);
   7438 
   7439 	pci_set_master(rdev->pdev);
   7440 
   7441 	/* enable irqs */
   7442 	cik_enable_interrupts(rdev);
   7443 
   7444 	return ret;
   7445 }
   7446 
   7447 /**
   7448  * cik_irq_set - enable/disable interrupt sources
   7449  *
   7450  * @rdev: radeon_device pointer
   7451  *
   7452  * Enable interrupt sources on the GPU (vblanks, hpd,
   7453  * etc.) (CIK).
   7454  * Returns 0 for success, errors for failure.
   7455  */
   7456 int cik_irq_set(struct radeon_device *rdev)
   7457 {
   7458 	u32 cp_int_cntl;
   7459 	u32 cp_m1p0;
   7460 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
   7461 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
   7462 	u32 grbm_int_cntl = 0;
   7463 	u32 dma_cntl, dma_cntl1;
   7464 
   7465 	if (!rdev->irq.installed) {
   7466 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
   7467 		return -EINVAL;
   7468 	}
   7469 	/* don't enable anything if the ih is disabled */
   7470 	if (!rdev->ih.enabled) {
   7471 		cik_disable_interrupts(rdev);
   7472 		/* force the active interrupt state to all disabled */
   7473 		cik_disable_interrupt_state(rdev);
   7474 		return 0;
   7475 	}
   7476 
   7477 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
   7478 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   7479 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
   7480 
   7481 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7482 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7483 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7484 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7485 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7486 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7487 
   7488 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
   7489 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
   7490 
   7491 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7492 
   7493 	/* enable CP interrupts on all rings */
   7494 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
   7495 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
   7496 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
   7497 	}
   7498 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
   7499 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   7500 		DRM_DEBUG("si_irq_set: sw int cp1\n");
   7501 		if (ring->me == 1) {
   7502 			switch (ring->pipe) {
   7503 			case 0:
   7504 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
   7505 				break;
   7506 			default:
   7507 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
   7508 				break;
   7509 			}
   7510 		} else {
   7511 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
   7512 		}
   7513 	}
   7514 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
   7515 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   7516 		DRM_DEBUG("si_irq_set: sw int cp2\n");
   7517 		if (ring->me == 1) {
   7518 			switch (ring->pipe) {
   7519 			case 0:
   7520 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
   7521 				break;
   7522 			default:
   7523 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
   7524 				break;
   7525 			}
   7526 		} else {
   7527 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
   7528 		}
   7529 	}
   7530 
   7531 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
   7532 		DRM_DEBUG("cik_irq_set: sw int dma\n");
   7533 		dma_cntl |= TRAP_ENABLE;
   7534 	}
   7535 
   7536 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
   7537 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
   7538 		dma_cntl1 |= TRAP_ENABLE;
   7539 	}
   7540 
   7541 	if (rdev->irq.crtc_vblank_int[0] ||
   7542 	    atomic_read(&rdev->irq.pflip[0])) {
   7543 		DRM_DEBUG("cik_irq_set: vblank 0\n");
   7544 		crtc1 |= VBLANK_INTERRUPT_MASK;
   7545 	}
   7546 	if (rdev->irq.crtc_vblank_int[1] ||
   7547 	    atomic_read(&rdev->irq.pflip[1])) {
   7548 		DRM_DEBUG("cik_irq_set: vblank 1\n");
   7549 		crtc2 |= VBLANK_INTERRUPT_MASK;
   7550 	}
   7551 	if (rdev->irq.crtc_vblank_int[2] ||
   7552 	    atomic_read(&rdev->irq.pflip[2])) {
   7553 		DRM_DEBUG("cik_irq_set: vblank 2\n");
   7554 		crtc3 |= VBLANK_INTERRUPT_MASK;
   7555 	}
   7556 	if (rdev->irq.crtc_vblank_int[3] ||
   7557 	    atomic_read(&rdev->irq.pflip[3])) {
   7558 		DRM_DEBUG("cik_irq_set: vblank 3\n");
   7559 		crtc4 |= VBLANK_INTERRUPT_MASK;
   7560 	}
   7561 	if (rdev->irq.crtc_vblank_int[4] ||
   7562 	    atomic_read(&rdev->irq.pflip[4])) {
   7563 		DRM_DEBUG("cik_irq_set: vblank 4\n");
   7564 		crtc5 |= VBLANK_INTERRUPT_MASK;
   7565 	}
   7566 	if (rdev->irq.crtc_vblank_int[5] ||
   7567 	    atomic_read(&rdev->irq.pflip[5])) {
   7568 		DRM_DEBUG("cik_irq_set: vblank 5\n");
   7569 		crtc6 |= VBLANK_INTERRUPT_MASK;
   7570 	}
   7571 	if (rdev->irq.hpd[0]) {
   7572 		DRM_DEBUG("cik_irq_set: hpd 1\n");
   7573 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7574 	}
   7575 	if (rdev->irq.hpd[1]) {
   7576 		DRM_DEBUG("cik_irq_set: hpd 2\n");
   7577 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7578 	}
   7579 	if (rdev->irq.hpd[2]) {
   7580 		DRM_DEBUG("cik_irq_set: hpd 3\n");
   7581 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7582 	}
   7583 	if (rdev->irq.hpd[3]) {
   7584 		DRM_DEBUG("cik_irq_set: hpd 4\n");
   7585 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7586 	}
   7587 	if (rdev->irq.hpd[4]) {
   7588 		DRM_DEBUG("cik_irq_set: hpd 5\n");
   7589 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7590 	}
   7591 	if (rdev->irq.hpd[5]) {
   7592 		DRM_DEBUG("cik_irq_set: hpd 6\n");
   7593 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7594 	}
   7595 
   7596 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
   7597 
   7598 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
   7599 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
   7600 
   7601 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
   7602 
   7603 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
   7604 
   7605 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
   7606 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
   7607 	if (rdev->num_crtc >= 4) {
   7608 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
   7609 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
   7610 	}
   7611 	if (rdev->num_crtc >= 6) {
   7612 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
   7613 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
   7614 	}
   7615 
   7616 	if (rdev->num_crtc >= 2) {
   7617 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
   7618 		       GRPH_PFLIP_INT_MASK);
   7619 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
   7620 		       GRPH_PFLIP_INT_MASK);
   7621 	}
   7622 	if (rdev->num_crtc >= 4) {
   7623 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
   7624 		       GRPH_PFLIP_INT_MASK);
   7625 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
   7626 		       GRPH_PFLIP_INT_MASK);
   7627 	}
   7628 	if (rdev->num_crtc >= 6) {
   7629 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
   7630 		       GRPH_PFLIP_INT_MASK);
   7631 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
   7632 		       GRPH_PFLIP_INT_MASK);
   7633 	}
   7634 
   7635 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
   7636 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
   7637 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
   7638 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
   7639 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
   7640 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
   7641 
   7642 	/* posting read */
   7643 	RREG32(SRBM_STATUS);
   7644 
   7645 	return 0;
   7646 }
   7647 
   7648 /**
   7649  * cik_irq_ack - ack interrupt sources
   7650  *
   7651  * @rdev: radeon_device pointer
   7652  *
   7653  * Ack interrupt sources on the GPU (vblanks, hpd,
   7654  * etc.) (CIK).  Certain interrupts sources are sw
   7655  * generated and do not require an explicit ack.
   7656  */
   7657 static inline void cik_irq_ack(struct radeon_device *rdev)
   7658 {
   7659 	u32 tmp;
   7660 
   7661 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
   7662 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
   7663 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
   7664 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
   7665 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
   7666 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
   7667 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
   7668 
   7669 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
   7670 		EVERGREEN_CRTC0_REGISTER_OFFSET);
   7671 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
   7672 		EVERGREEN_CRTC1_REGISTER_OFFSET);
   7673 	if (rdev->num_crtc >= 4) {
   7674 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
   7675 			EVERGREEN_CRTC2_REGISTER_OFFSET);
   7676 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
   7677 			EVERGREEN_CRTC3_REGISTER_OFFSET);
   7678 	}
   7679 	if (rdev->num_crtc >= 6) {
   7680 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
   7681 			EVERGREEN_CRTC4_REGISTER_OFFSET);
   7682 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
   7683 			EVERGREEN_CRTC5_REGISTER_OFFSET);
   7684 	}
   7685 
   7686 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
   7687 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
   7688 		       GRPH_PFLIP_INT_CLEAR);
   7689 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
   7690 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
   7691 		       GRPH_PFLIP_INT_CLEAR);
   7692 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
   7693 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
   7694 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
   7695 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
   7696 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
   7697 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
   7698 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
   7699 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
   7700 
   7701 	if (rdev->num_crtc >= 4) {
   7702 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
   7703 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
   7704 			       GRPH_PFLIP_INT_CLEAR);
   7705 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
   7706 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
   7707 			       GRPH_PFLIP_INT_CLEAR);
   7708 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
   7709 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
   7710 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
   7711 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
   7712 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
   7713 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
   7714 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
   7715 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
   7716 	}
   7717 
   7718 	if (rdev->num_crtc >= 6) {
   7719 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
   7720 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
   7721 			       GRPH_PFLIP_INT_CLEAR);
   7722 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
   7723 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
   7724 			       GRPH_PFLIP_INT_CLEAR);
   7725 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
   7726 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
   7727 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
   7728 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
   7729 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
   7730 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
   7731 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
   7732 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
   7733 	}
   7734 
   7735 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
   7736 		tmp = RREG32(DC_HPD1_INT_CONTROL);
   7737 		tmp |= DC_HPDx_INT_ACK;
   7738 		WREG32(DC_HPD1_INT_CONTROL, tmp);
   7739 	}
   7740 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
   7741 		tmp = RREG32(DC_HPD2_INT_CONTROL);
   7742 		tmp |= DC_HPDx_INT_ACK;
   7743 		WREG32(DC_HPD2_INT_CONTROL, tmp);
   7744 	}
   7745 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
   7746 		tmp = RREG32(DC_HPD3_INT_CONTROL);
   7747 		tmp |= DC_HPDx_INT_ACK;
   7748 		WREG32(DC_HPD3_INT_CONTROL, tmp);
   7749 	}
   7750 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
   7751 		tmp = RREG32(DC_HPD4_INT_CONTROL);
   7752 		tmp |= DC_HPDx_INT_ACK;
   7753 		WREG32(DC_HPD4_INT_CONTROL, tmp);
   7754 	}
   7755 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
   7756 		tmp = RREG32(DC_HPD5_INT_CONTROL);
   7757 		tmp |= DC_HPDx_INT_ACK;
   7758 		WREG32(DC_HPD5_INT_CONTROL, tmp);
   7759 	}
   7760 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
   7761 		tmp = RREG32(DC_HPD6_INT_CONTROL);
   7762 		tmp |= DC_HPDx_INT_ACK;
   7763 		WREG32(DC_HPD6_INT_CONTROL, tmp);
   7764 	}
   7765 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
   7766 		tmp = RREG32(DC_HPD1_INT_CONTROL);
   7767 		tmp |= DC_HPDx_RX_INT_ACK;
   7768 		WREG32(DC_HPD1_INT_CONTROL, tmp);
   7769 	}
   7770 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
   7771 		tmp = RREG32(DC_HPD2_INT_CONTROL);
   7772 		tmp |= DC_HPDx_RX_INT_ACK;
   7773 		WREG32(DC_HPD2_INT_CONTROL, tmp);
   7774 	}
   7775 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
   7776 		tmp = RREG32(DC_HPD3_INT_CONTROL);
   7777 		tmp |= DC_HPDx_RX_INT_ACK;
   7778 		WREG32(DC_HPD3_INT_CONTROL, tmp);
   7779 	}
   7780 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
   7781 		tmp = RREG32(DC_HPD4_INT_CONTROL);
   7782 		tmp |= DC_HPDx_RX_INT_ACK;
   7783 		WREG32(DC_HPD4_INT_CONTROL, tmp);
   7784 	}
   7785 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
   7786 		tmp = RREG32(DC_HPD5_INT_CONTROL);
   7787 		tmp |= DC_HPDx_RX_INT_ACK;
   7788 		WREG32(DC_HPD5_INT_CONTROL, tmp);
   7789 	}
   7790 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
   7791 		tmp = RREG32(DC_HPD6_INT_CONTROL);
   7792 		tmp |= DC_HPDx_RX_INT_ACK;
   7793 		WREG32(DC_HPD6_INT_CONTROL, tmp);
   7794 	}
   7795 }
   7796 
   7797 /**
   7798  * cik_irq_disable - disable interrupts
   7799  *
   7800  * @rdev: radeon_device pointer
   7801  *
   7802  * Disable interrupts on the hw (CIK).
   7803  */
   7804 static void cik_irq_disable(struct radeon_device *rdev)
   7805 {
   7806 	cik_disable_interrupts(rdev);
   7807 	/* Wait and acknowledge irq */
   7808 	mdelay(1);
   7809 	cik_irq_ack(rdev);
   7810 	cik_disable_interrupt_state(rdev);
   7811 }
   7812 
   7813 /**
   7814  * cik_irq_disable - disable interrupts for suspend
   7815  *
   7816  * @rdev: radeon_device pointer
   7817  *
   7818  * Disable interrupts and stop the RLC (CIK).
   7819  * Used for suspend.
   7820  */
   7821 static void cik_irq_suspend(struct radeon_device *rdev)
   7822 {
   7823 	cik_irq_disable(rdev);
   7824 	cik_rlc_stop(rdev);
   7825 }
   7826 
   7827 /**
   7828  * cik_irq_fini - tear down interrupt support
   7829  *
   7830  * @rdev: radeon_device pointer
   7831  *
   7832  * Disable interrupts on the hw and free the IH ring
   7833  * buffer (CIK).
   7834  * Used for driver unload.
   7835  */
   7836 static void cik_irq_fini(struct radeon_device *rdev)
   7837 {
   7838 	cik_irq_suspend(rdev);
   7839 	r600_ih_ring_fini(rdev);
   7840 }
   7841 
   7842 /**
   7843  * cik_get_ih_wptr - get the IH ring buffer wptr
   7844  *
   7845  * @rdev: radeon_device pointer
   7846  *
   7847  * Get the IH ring buffer wptr from either the register
   7848  * or the writeback memory buffer (CIK).  Also check for
   7849  * ring buffer overflow and deal with it.
   7850  * Used by cik_irq_process().
   7851  * Returns the value of the wptr.
   7852  */
   7853 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
   7854 {
   7855 	u32 wptr, tmp;
   7856 
   7857 	if (rdev->wb.enabled)
   7858 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
   7859 	else
   7860 		wptr = RREG32(IH_RB_WPTR);
   7861 
   7862 	if (wptr & RB_OVERFLOW) {
   7863 		wptr &= ~RB_OVERFLOW;
   7864 		/* When a ring buffer overflow happen start parsing interrupt
   7865 		 * from the last not overwritten vector (wptr + 16). Hopefully
   7866 		 * this should allow us to catchup.
   7867 		 */
   7868 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
   7869 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
   7870 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
   7871 		tmp = RREG32(IH_RB_CNTL);
   7872 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
   7873 		WREG32(IH_RB_CNTL, tmp);
   7874 	}
   7875 	return (wptr & rdev->ih.ptr_mask);
   7876 }
   7877 
   7878 /*        CIK IV Ring
   7879  * Each IV ring entry is 128 bits:
   7880  * [7:0]    - interrupt source id
   7881  * [31:8]   - reserved
   7882  * [59:32]  - interrupt source data
   7883  * [63:60]  - reserved
   7884  * [71:64]  - RINGID
   7885  *            CP:
   7886  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
   7887  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
   7888  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
   7889  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
   7890  *            PIPE_ID - ME0 0=3D
   7891  *                    - ME1&2 compute dispatcher (4 pipes each)
   7892  *            SDMA:
   7893  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
   7894  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
   7895  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
   7896  * [79:72]  - VMID
   7897  * [95:80]  - PASID
   7898  * [127:96] - reserved
   7899  */
   7900 /**
   7901  * cik_irq_process - interrupt handler
   7902  *
   7903  * @rdev: radeon_device pointer
   7904  *
   7905  * Interrupt hander (CIK).  Walk the IH ring,
   7906  * ack interrupts and schedule work to handle
   7907  * interrupt events.
   7908  * Returns irq process return code.
   7909  */
   7910 int cik_irq_process(struct radeon_device *rdev)
   7911 {
   7912 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   7913 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   7914 	u32 wptr;
   7915 	u32 rptr;
   7916 	u32 src_id, src_data, ring_id;
   7917 	u8 me_id, pipe_id, queue_id;
   7918 	u32 ring_index;
   7919 	bool queue_hotplug = false;
   7920 	bool queue_dp = false;
   7921 	bool queue_reset = false;
   7922 	u32 addr, status, mc_client;
   7923 	bool queue_thermal = false;
   7924 
   7925 	if (!rdev->ih.enabled || rdev->shutdown)
   7926 		return IRQ_NONE;
   7927 
   7928 	wptr = cik_get_ih_wptr(rdev);
   7929 
   7930 restart_ih:
   7931 	/* is somebody else already processing irqs? */
   7932 	if (atomic_xchg(&rdev->ih.lock, 1))
   7933 		return IRQ_NONE;
   7934 
   7935 	rptr = rdev->ih.rptr;
   7936 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
   7937 
   7938 	/* Order reading of wptr vs. reading of IH ring data */
   7939 	rmb();
   7940 
   7941 	/* display interrupts */
   7942 	cik_irq_ack(rdev);
   7943 
   7944 	while (rptr != wptr) {
   7945 		/* wptr/rptr are in bytes! */
   7946 		ring_index = rptr / 4;
   7947 
   7948 		radeon_kfd_interrupt(rdev,
   7949 		    (const void *)__UNVOLATILE(&rdev->ih.ring[ring_index]));
   7950 
   7951 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
   7952 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
   7953 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
   7954 
   7955 		switch (src_id) {
   7956 		case 1: /* D1 vblank/vline */
   7957 			switch (src_data) {
   7958 			case 0: /* D1 vblank */
   7959 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
   7960 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7961 
   7962 				if (rdev->irq.crtc_vblank_int[0]) {
   7963 					drm_handle_vblank(rdev->ddev, 0);
   7964 #ifdef __NetBSD__
   7965 						spin_lock(&rdev->irq.vblank_lock);
   7966 						rdev->pm.vblank_sync = true;
   7967 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
   7968 						spin_unlock(&rdev->irq.vblank_lock);
   7969 #else
   7970 					rdev->pm.vblank_sync = true;
   7971 					wake_up(&rdev->irq.vblank_queue);
   7972 #endif
   7973 				}
   7974 				if (atomic_read(&rdev->irq.pflip[0]))
   7975 					radeon_crtc_handle_vblank(rdev, 0);
   7976 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
   7977 				DRM_DEBUG("IH: D1 vblank\n");
   7978 
   7979 				break;
   7980 			case 1: /* D1 vline */
   7981 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
   7982 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7983 
   7984 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
   7985 				DRM_DEBUG("IH: D1 vline\n");
   7986 
   7987 				break;
   7988 			default:
   7989 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7990 				break;
   7991 			}
   7992 			break;
   7993 		case 2: /* D2 vblank/vline */
   7994 			switch (src_data) {
   7995 			case 0: /* D2 vblank */
   7996 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
   7997 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7998 
   7999 				if (rdev->irq.crtc_vblank_int[1]) {
   8000 					drm_handle_vblank(rdev->ddev, 1);
   8001 #ifdef __NetBSD__
   8002 						spin_lock(&rdev->irq.vblank_lock);
   8003 						rdev->pm.vblank_sync = true;
   8004 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
   8005 						spin_unlock(&rdev->irq.vblank_lock);
   8006 #else
   8007 					rdev->pm.vblank_sync = true;
   8008 					wake_up(&rdev->irq.vblank_queue);
   8009 #endif
   8010 				}
   8011 				if (atomic_read(&rdev->irq.pflip[1]))
   8012 					radeon_crtc_handle_vblank(rdev, 1);
   8013 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
   8014 				DRM_DEBUG("IH: D2 vblank\n");
   8015 
   8016 				break;
   8017 			case 1: /* D2 vline */
   8018 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
   8019 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8020 
   8021 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
   8022 				DRM_DEBUG("IH: D2 vline\n");
   8023 
   8024 				break;
   8025 			default:
   8026 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8027 				break;
   8028 			}
   8029 			break;
   8030 		case 3: /* D3 vblank/vline */
   8031 			switch (src_data) {
   8032 			case 0: /* D3 vblank */
   8033 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
   8034 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8035 
   8036 				if (rdev->irq.crtc_vblank_int[2]) {
   8037 					drm_handle_vblank(rdev->ddev, 2);
   8038 #ifdef __NetBSD__
   8039 						spin_lock(&rdev->irq.vblank_lock);
   8040 						rdev->pm.vblank_sync = true;
   8041 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
   8042 						spin_unlock(&rdev->irq.vblank_lock);
   8043 #else
   8044 					rdev->pm.vblank_sync = true;
   8045 					wake_up(&rdev->irq.vblank_queue);
   8046 #endif
   8047 				}
   8048 				if (atomic_read(&rdev->irq.pflip[2]))
   8049 					radeon_crtc_handle_vblank(rdev, 2);
   8050 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
   8051 				DRM_DEBUG("IH: D3 vblank\n");
   8052 
   8053 				break;
   8054 			case 1: /* D3 vline */
   8055 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
   8056 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8057 
   8058 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
   8059 				DRM_DEBUG("IH: D3 vline\n");
   8060 
   8061 				break;
   8062 			default:
   8063 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8064 				break;
   8065 			}
   8066 			break;
   8067 		case 4: /* D4 vblank/vline */
   8068 			switch (src_data) {
   8069 			case 0: /* D4 vblank */
   8070 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
   8071 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8072 
   8073 				if (rdev->irq.crtc_vblank_int[3]) {
   8074 					drm_handle_vblank(rdev->ddev, 3);
   8075 #ifdef __NetBSD__
   8076 						spin_lock(&rdev->irq.vblank_lock);
   8077 						rdev->pm.vblank_sync = true;
   8078 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
   8079 						spin_unlock(&rdev->irq.vblank_lock);
   8080 #else
   8081 					rdev->pm.vblank_sync = true;
   8082 					wake_up(&rdev->irq.vblank_queue);
   8083 #endif
   8084 				}
   8085 				if (atomic_read(&rdev->irq.pflip[3]))
   8086 					radeon_crtc_handle_vblank(rdev, 3);
   8087 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
   8088 				DRM_DEBUG("IH: D4 vblank\n");
   8089 
   8090 				break;
   8091 			case 1: /* D4 vline */
   8092 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
   8093 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8094 
   8095 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
   8096 				DRM_DEBUG("IH: D4 vline\n");
   8097 
   8098 				break;
   8099 			default:
   8100 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8101 				break;
   8102 			}
   8103 			break;
   8104 		case 5: /* D5 vblank/vline */
   8105 			switch (src_data) {
   8106 			case 0: /* D5 vblank */
   8107 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
   8108 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8109 
   8110 				if (rdev->irq.crtc_vblank_int[4]) {
   8111 					drm_handle_vblank(rdev->ddev, 4);
   8112 #ifdef __NetBSD__
   8113 						spin_lock(&rdev->irq.vblank_lock);
   8114 						rdev->pm.vblank_sync = true;
   8115 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
   8116 						spin_unlock(&rdev->irq.vblank_lock);
   8117 #else
   8118 					rdev->pm.vblank_sync = true;
   8119 					wake_up(&rdev->irq.vblank_queue);
   8120 #endif
   8121 				}
   8122 				if (atomic_read(&rdev->irq.pflip[4]))
   8123 					radeon_crtc_handle_vblank(rdev, 4);
   8124 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
   8125 				DRM_DEBUG("IH: D5 vblank\n");
   8126 
   8127 				break;
   8128 			case 1: /* D5 vline */
   8129 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
   8130 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8131 
   8132 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
   8133 				DRM_DEBUG("IH: D5 vline\n");
   8134 
   8135 				break;
   8136 			default:
   8137 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8138 				break;
   8139 			}
   8140 			break;
   8141 		case 6: /* D6 vblank/vline */
   8142 			switch (src_data) {
   8143 			case 0: /* D6 vblank */
   8144 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
   8145 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8146 
   8147 				if (rdev->irq.crtc_vblank_int[5]) {
   8148 					drm_handle_vblank(rdev->ddev, 5);
   8149 #ifdef __NetBSD__
   8150 						spin_lock(&rdev->irq.vblank_lock);
   8151 						rdev->pm.vblank_sync = true;
   8152 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
   8153 						spin_unlock(&rdev->irq.vblank_lock);
   8154 #else
   8155 					rdev->pm.vblank_sync = true;
   8156 					wake_up(&rdev->irq.vblank_queue);
   8157 #endif
   8158 				}
   8159 				if (atomic_read(&rdev->irq.pflip[5]))
   8160 					radeon_crtc_handle_vblank(rdev, 5);
   8161 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
   8162 				DRM_DEBUG("IH: D6 vblank\n");
   8163 
   8164 				break;
   8165 			case 1: /* D6 vline */
   8166 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
   8167 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8168 
   8169 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
   8170 				DRM_DEBUG("IH: D6 vline\n");
   8171 
   8172 				break;
   8173 			default:
   8174 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8175 				break;
   8176 			}
   8177 			break;
   8178 		case 8: /* D1 page flip */
   8179 		case 10: /* D2 page flip */
   8180 		case 12: /* D3 page flip */
   8181 		case 14: /* D4 page flip */
   8182 		case 16: /* D5 page flip */
   8183 		case 18: /* D6 page flip */
   8184 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
   8185 			if (radeon_use_pflipirq > 0)
   8186 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
   8187 			break;
   8188 		case 42: /* HPD hotplug */
   8189 			switch (src_data) {
   8190 			case 0:
   8191 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
   8192 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8193 
   8194 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
   8195 				queue_hotplug = true;
   8196 				DRM_DEBUG("IH: HPD1\n");
   8197 
   8198 				break;
   8199 			case 1:
   8200 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
   8201 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8202 
   8203 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
   8204 				queue_hotplug = true;
   8205 				DRM_DEBUG("IH: HPD2\n");
   8206 
   8207 				break;
   8208 			case 2:
   8209 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
   8210 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8211 
   8212 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
   8213 				queue_hotplug = true;
   8214 				DRM_DEBUG("IH: HPD3\n");
   8215 
   8216 				break;
   8217 			case 3:
   8218 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
   8219 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8220 
   8221 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
   8222 				queue_hotplug = true;
   8223 				DRM_DEBUG("IH: HPD4\n");
   8224 
   8225 				break;
   8226 			case 4:
   8227 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
   8228 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8229 
   8230 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
   8231 				queue_hotplug = true;
   8232 				DRM_DEBUG("IH: HPD5\n");
   8233 
   8234 				break;
   8235 			case 5:
   8236 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
   8237 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8238 
   8239 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
   8240 				queue_hotplug = true;
   8241 				DRM_DEBUG("IH: HPD6\n");
   8242 
   8243 				break;
   8244 			case 6:
   8245 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
   8246 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8247 
   8248 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
   8249 				queue_dp = true;
   8250 				DRM_DEBUG("IH: HPD_RX 1\n");
   8251 
   8252 				break;
   8253 			case 7:
   8254 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
   8255 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8256 
   8257 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
   8258 				queue_dp = true;
   8259 				DRM_DEBUG("IH: HPD_RX 2\n");
   8260 
   8261 				break;
   8262 			case 8:
   8263 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
   8264 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8265 
   8266 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
   8267 				queue_dp = true;
   8268 				DRM_DEBUG("IH: HPD_RX 3\n");
   8269 
   8270 				break;
   8271 			case 9:
   8272 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
   8273 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8274 
   8275 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
   8276 				queue_dp = true;
   8277 				DRM_DEBUG("IH: HPD_RX 4\n");
   8278 
   8279 				break;
   8280 			case 10:
   8281 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
   8282 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8283 
   8284 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
   8285 				queue_dp = true;
   8286 				DRM_DEBUG("IH: HPD_RX 5\n");
   8287 
   8288 				break;
   8289 			case 11:
   8290 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
   8291 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   8292 
   8293 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
   8294 				queue_dp = true;
   8295 				DRM_DEBUG("IH: HPD_RX 6\n");
   8296 
   8297 				break;
   8298 			default:
   8299 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8300 				break;
   8301 			}
   8302 			break;
   8303 		case 96:
   8304 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
   8305 			WREG32(SRBM_INT_ACK, 0x1);
   8306 			break;
   8307 		case 124: /* UVD */
   8308 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
   8309 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
   8310 			break;
   8311 		case 146:
   8312 		case 147:
   8313 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
   8314 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
   8315 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
   8316 			/* reset addr and status */
   8317 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
   8318 			if (addr == 0x0 && status == 0x0)
   8319 				break;
   8320 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
   8321 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
   8322 				addr);
   8323 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
   8324 				status);
   8325 			cik_vm_decode_fault(rdev, status, addr, mc_client);
   8326 			break;
   8327 		case 167: /* VCE */
   8328 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
   8329 			switch (src_data) {
   8330 			case 0:
   8331 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
   8332 				break;
   8333 			case 1:
   8334 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
   8335 				break;
   8336 			default:
   8337 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
   8338 				break;
   8339 			}
   8340 			break;
   8341 		case 176: /* GFX RB CP_INT */
   8342 		case 177: /* GFX IB CP_INT */
   8343 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
   8344 			break;
   8345 		case 181: /* CP EOP event */
   8346 			DRM_DEBUG("IH: CP EOP\n");
   8347 			/* XXX check the bitfield order! */
   8348 			me_id = (ring_id & 0x60) >> 5;
   8349 			pipe_id = (ring_id & 0x18) >> 3;
   8350 			queue_id = (ring_id & 0x7) >> 0;
   8351 			switch (me_id) {
   8352 			case 0:
   8353 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
   8354 				break;
   8355 			case 1:
   8356 			case 2:
   8357 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
   8358 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
   8359 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
   8360 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
   8361 				break;
   8362 			}
   8363 			break;
   8364 		case 184: /* CP Privileged reg access */
   8365 			DRM_ERROR("Illegal register access in command stream\n");
   8366 			/* XXX check the bitfield order! */
   8367 			me_id = (ring_id & 0x60) >> 5;
   8368 			pipe_id = (ring_id & 0x18) >> 3;
   8369 			queue_id = (ring_id & 0x7) >> 0;
   8370 			switch (me_id) {
   8371 			case 0:
   8372 				/* This results in a full GPU reset, but all we need to do is soft
   8373 				 * reset the CP for gfx
   8374 				 */
   8375 				queue_reset = true;
   8376 				break;
   8377 			case 1:
   8378 				/* XXX compute */
   8379 				queue_reset = true;
   8380 				break;
   8381 			case 2:
   8382 				/* XXX compute */
   8383 				queue_reset = true;
   8384 				break;
   8385 			}
   8386 			break;
   8387 		case 185: /* CP Privileged inst */
   8388 			DRM_ERROR("Illegal instruction in command stream\n");
   8389 			/* XXX check the bitfield order! */
   8390 			me_id = (ring_id & 0x60) >> 5;
   8391 			pipe_id = (ring_id & 0x18) >> 3;
   8392 			queue_id = (ring_id & 0x7) >> 0;
   8393 			switch (me_id) {
   8394 			case 0:
   8395 				/* This results in a full GPU reset, but all we need to do is soft
   8396 				 * reset the CP for gfx
   8397 				 */
   8398 				queue_reset = true;
   8399 				break;
   8400 			case 1:
   8401 				/* XXX compute */
   8402 				queue_reset = true;
   8403 				break;
   8404 			case 2:
   8405 				/* XXX compute */
   8406 				queue_reset = true;
   8407 				break;
   8408 			}
   8409 			break;
   8410 		case 224: /* SDMA trap event */
   8411 			/* XXX check the bitfield order! */
   8412 			me_id = (ring_id & 0x3) >> 0;
   8413 			queue_id = (ring_id & 0xc) >> 2;
   8414 			DRM_DEBUG("IH: SDMA trap\n");
   8415 			switch (me_id) {
   8416 			case 0:
   8417 				switch (queue_id) {
   8418 				case 0:
   8419 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
   8420 					break;
   8421 				case 1:
   8422 					/* XXX compute */
   8423 					break;
   8424 				case 2:
   8425 					/* XXX compute */
   8426 					break;
   8427 				}
   8428 				break;
   8429 			case 1:
   8430 				switch (queue_id) {
   8431 				case 0:
   8432 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
   8433 					break;
   8434 				case 1:
   8435 					/* XXX compute */
   8436 					break;
   8437 				case 2:
   8438 					/* XXX compute */
   8439 					break;
   8440 				}
   8441 				break;
   8442 			}
   8443 			break;
   8444 		case 230: /* thermal low to high */
   8445 			DRM_DEBUG("IH: thermal low to high\n");
   8446 			rdev->pm.dpm.thermal.high_to_low = false;
   8447 			queue_thermal = true;
   8448 			break;
   8449 		case 231: /* thermal high to low */
   8450 			DRM_DEBUG("IH: thermal high to low\n");
   8451 			rdev->pm.dpm.thermal.high_to_low = true;
   8452 			queue_thermal = true;
   8453 			break;
   8454 		case 233: /* GUI IDLE */
   8455 			DRM_DEBUG("IH: GUI idle\n");
   8456 			break;
   8457 		case 241: /* SDMA Privileged inst */
   8458 		case 247: /* SDMA Privileged inst */
   8459 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
   8460 			/* XXX check the bitfield order! */
   8461 			me_id = (ring_id & 0x3) >> 0;
   8462 			queue_id = (ring_id & 0xc) >> 2;
   8463 			switch (me_id) {
   8464 			case 0:
   8465 				switch (queue_id) {
   8466 				case 0:
   8467 					queue_reset = true;
   8468 					break;
   8469 				case 1:
   8470 					/* XXX compute */
   8471 					queue_reset = true;
   8472 					break;
   8473 				case 2:
   8474 					/* XXX compute */
   8475 					queue_reset = true;
   8476 					break;
   8477 				}
   8478 				break;
   8479 			case 1:
   8480 				switch (queue_id) {
   8481 				case 0:
   8482 					queue_reset = true;
   8483 					break;
   8484 				case 1:
   8485 					/* XXX compute */
   8486 					queue_reset = true;
   8487 					break;
   8488 				case 2:
   8489 					/* XXX compute */
   8490 					queue_reset = true;
   8491 					break;
   8492 				}
   8493 				break;
   8494 			}
   8495 			break;
   8496 		default:
   8497 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8498 			break;
   8499 		}
   8500 
   8501 		/* wptr/rptr are in bytes! */
   8502 		rptr += 16;
   8503 		rptr &= rdev->ih.ptr_mask;
   8504 		WREG32(IH_RB_RPTR, rptr);
   8505 	}
   8506 	if (queue_dp)
   8507 		schedule_work(&rdev->dp_work);
   8508 	if (queue_hotplug)
   8509 		schedule_delayed_work(&rdev->hotplug_work, 0);
   8510 	if (queue_reset) {
   8511 #ifdef __NetBSD__
   8512 		spin_lock(&rdev->fence_lock);
   8513 		rdev->needs_reset = true;
   8514 		radeon_fence_wakeup_locked(rdev);
   8515 		spin_unlock(&rdev->fence_lock);
   8516 #else
   8517 		rdev->needs_reset = true;
   8518 		wake_up_all(&rdev->fence_queue);
   8519 #endif
   8520 	}
   8521 	if (queue_thermal)
   8522 		schedule_work(&rdev->pm.dpm.thermal.work);
   8523 	rdev->ih.rptr = rptr;
   8524 	atomic_set(&rdev->ih.lock, 0);
   8525 
   8526 	/* make sure wptr hasn't changed while processing */
   8527 	wptr = cik_get_ih_wptr(rdev);
   8528 	if (wptr != rptr)
   8529 		goto restart_ih;
   8530 
   8531 	return IRQ_HANDLED;
   8532 }
   8533 
   8534 /*
   8535  * startup/shutdown callbacks
   8536  */
   8537 /**
   8538  * cik_startup - program the asic to a functional state
   8539  *
   8540  * @rdev: radeon_device pointer
   8541  *
   8542  * Programs the asic to a functional state (CIK).
   8543  * Called by cik_init() and cik_resume().
   8544  * Returns 0 for success, error for failure.
   8545  */
   8546 static int cik_startup(struct radeon_device *rdev)
   8547 {
   8548 	struct radeon_ring *ring;
   8549 	u32 nop;
   8550 	int r;
   8551 
   8552 	/* enable pcie gen2/3 link */
   8553 	cik_pcie_gen3_enable(rdev);
   8554 	/* enable aspm */
   8555 	cik_program_aspm(rdev);
   8556 
   8557 	/* scratch needs to be initialized before MC */
   8558 	r = r600_vram_scratch_init(rdev);
   8559 	if (r)
   8560 		return r;
   8561 
   8562 	cik_mc_program(rdev);
   8563 
   8564 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
   8565 		r = ci_mc_load_microcode(rdev);
   8566 		if (r) {
   8567 			DRM_ERROR("Failed to load MC firmware!\n");
   8568 			return r;
   8569 		}
   8570 	}
   8571 
   8572 	r = cik_pcie_gart_enable(rdev);
   8573 	if (r)
   8574 		return r;
   8575 	cik_gpu_init(rdev);
   8576 
   8577 	/* allocate rlc buffers */
   8578 	if (rdev->flags & RADEON_IS_IGP) {
   8579 		if (rdev->family == CHIP_KAVERI) {
   8580 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
   8581 			rdev->rlc.reg_list_size =
   8582 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
   8583 		} else {
   8584 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
   8585 			rdev->rlc.reg_list_size =
   8586 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
   8587 		}
   8588 	}
   8589 	rdev->rlc.cs_data = ci_cs_data;
   8590 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
   8591 	r = sumo_rlc_init(rdev);
   8592 	if (r) {
   8593 		DRM_ERROR("Failed to init rlc BOs!\n");
   8594 		return r;
   8595 	}
   8596 
   8597 	/* allocate wb buffer */
   8598 	r = radeon_wb_init(rdev);
   8599 	if (r)
   8600 		return r;
   8601 
   8602 	/* allocate mec buffers */
   8603 	r = cik_mec_init(rdev);
   8604 	if (r) {
   8605 		DRM_ERROR("Failed to init MEC BOs!\n");
   8606 		return r;
   8607 	}
   8608 
   8609 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
   8610 	if (r) {
   8611 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
   8612 		return r;
   8613 	}
   8614 
   8615 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
   8616 	if (r) {
   8617 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
   8618 		return r;
   8619 	}
   8620 
   8621 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
   8622 	if (r) {
   8623 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
   8624 		return r;
   8625 	}
   8626 
   8627 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
   8628 	if (r) {
   8629 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
   8630 		return r;
   8631 	}
   8632 
   8633 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
   8634 	if (r) {
   8635 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
   8636 		return r;
   8637 	}
   8638 
   8639 	r = radeon_uvd_resume(rdev);
   8640 	if (!r) {
   8641 		r = uvd_v4_2_resume(rdev);
   8642 		if (!r) {
   8643 			r = radeon_fence_driver_start_ring(rdev,
   8644 							   R600_RING_TYPE_UVD_INDEX);
   8645 			if (r)
   8646 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
   8647 		}
   8648 	}
   8649 	if (r)
   8650 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
   8651 
   8652 	r = radeon_vce_resume(rdev);
   8653 	if (!r) {
   8654 		r = vce_v2_0_resume(rdev);
   8655 		if (!r)
   8656 			r = radeon_fence_driver_start_ring(rdev,
   8657 							   TN_RING_TYPE_VCE1_INDEX);
   8658 		if (!r)
   8659 			r = radeon_fence_driver_start_ring(rdev,
   8660 							   TN_RING_TYPE_VCE2_INDEX);
   8661 	}
   8662 	if (r) {
   8663 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
   8664 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
   8665 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
   8666 	}
   8667 
   8668 	/* Enable IRQ */
   8669 	if (!rdev->irq.installed) {
   8670 		r = radeon_irq_kms_init(rdev);
   8671 		if (r)
   8672 			return r;
   8673 	}
   8674 
   8675 	r = cik_irq_init(rdev);
   8676 	if (r) {
   8677 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
   8678 		radeon_irq_kms_fini(rdev);
   8679 		return r;
   8680 	}
   8681 	cik_irq_set(rdev);
   8682 
   8683 	if (rdev->family == CHIP_HAWAII) {
   8684 		if (rdev->new_fw)
   8685 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
   8686 		else
   8687 			nop = RADEON_CP_PACKET2;
   8688 	} else {
   8689 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
   8690 	}
   8691 
   8692 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   8693 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
   8694 			     nop);
   8695 	if (r)
   8696 		return r;
   8697 
   8698 	/* set up the compute queues */
   8699 	/* type-2 packets are deprecated on MEC, use type-3 instead */
   8700 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   8701 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
   8702 			     nop);
   8703 	if (r)
   8704 		return r;
   8705 	ring->me = 1; /* first MEC */
   8706 	ring->pipe = 0; /* first pipe */
   8707 	ring->queue = 0; /* first queue */
   8708 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
   8709 
   8710 	/* type-2 packets are deprecated on MEC, use type-3 instead */
   8711 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   8712 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
   8713 			     nop);
   8714 	if (r)
   8715 		return r;
   8716 	/* dGPU only have 1 MEC */
   8717 	ring->me = 1; /* first MEC */
   8718 	ring->pipe = 0; /* first pipe */
   8719 	ring->queue = 1; /* second queue */
   8720 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
   8721 
   8722 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
   8723 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
   8724 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
   8725 	if (r)
   8726 		return r;
   8727 
   8728 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
   8729 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
   8730 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
   8731 	if (r)
   8732 		return r;
   8733 
   8734 	r = cik_cp_resume(rdev);
   8735 	if (r)
   8736 		return r;
   8737 
   8738 	r = cik_sdma_resume(rdev);
   8739 	if (r)
   8740 		return r;
   8741 
   8742 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
   8743 	if (ring->ring_size) {
   8744 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
   8745 				     RADEON_CP_PACKET2);
   8746 		if (!r)
   8747 			r = uvd_v1_0_init(rdev);
   8748 		if (r)
   8749 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
   8750 	}
   8751 
   8752 	r = -ENOENT;
   8753 
   8754 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
   8755 	if (ring->ring_size)
   8756 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
   8757 				     VCE_CMD_NO_OP);
   8758 
   8759 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
   8760 	if (ring->ring_size)
   8761 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
   8762 				     VCE_CMD_NO_OP);
   8763 
   8764 	if (!r)
   8765 		r = vce_v1_0_init(rdev);
   8766 	else if (r != -ENOENT)
   8767 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
   8768 
   8769 	r = radeon_ib_pool_init(rdev);
   8770 	if (r) {
   8771 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
   8772 		return r;
   8773 	}
   8774 
   8775 	r = radeon_vm_manager_init(rdev);
   8776 	if (r) {
   8777 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
   8778 		return r;
   8779 	}
   8780 
   8781 	r = radeon_audio_init(rdev);
   8782 	if (r)
   8783 		return r;
   8784 
   8785 	r = radeon_kfd_resume(rdev);
   8786 	if (r)
   8787 		return r;
   8788 
   8789 	return 0;
   8790 }
   8791 
   8792 /**
   8793  * cik_resume - resume the asic to a functional state
   8794  *
   8795  * @rdev: radeon_device pointer
   8796  *
   8797  * Programs the asic to a functional state (CIK).
   8798  * Called at resume.
   8799  * Returns 0 for success, error for failure.
   8800  */
   8801 int cik_resume(struct radeon_device *rdev)
   8802 {
   8803 	int r;
   8804 
   8805 	/* post card */
   8806 	atom_asic_init(rdev->mode_info.atom_context);
   8807 
   8808 	/* init golden registers */
   8809 	cik_init_golden_registers(rdev);
   8810 
   8811 	if (rdev->pm.pm_method == PM_METHOD_DPM)
   8812 		radeon_pm_resume(rdev);
   8813 
   8814 	rdev->accel_working = true;
   8815 	r = cik_startup(rdev);
   8816 	if (r) {
   8817 		DRM_ERROR("cik startup failed on resume\n");
   8818 		rdev->accel_working = false;
   8819 		return r;
   8820 	}
   8821 
   8822 	return r;
   8823 
   8824 }
   8825 
   8826 /**
   8827  * cik_suspend - suspend the asic
   8828  *
   8829  * @rdev: radeon_device pointer
   8830  *
   8831  * Bring the chip into a state suitable for suspend (CIK).
   8832  * Called at suspend.
   8833  * Returns 0 for success.
   8834  */
   8835 int cik_suspend(struct radeon_device *rdev)
   8836 {
   8837 	radeon_kfd_suspend(rdev);
   8838 	radeon_pm_suspend(rdev);
   8839 	radeon_audio_fini(rdev);
   8840 	radeon_vm_manager_fini(rdev);
   8841 	cik_cp_enable(rdev, false);
   8842 	cik_sdma_enable(rdev, false);
   8843 	uvd_v1_0_fini(rdev);
   8844 	radeon_uvd_suspend(rdev);
   8845 	radeon_vce_suspend(rdev);
   8846 	cik_fini_pg(rdev);
   8847 	cik_fini_cg(rdev);
   8848 	cik_irq_suspend(rdev);
   8849 	radeon_wb_disable(rdev);
   8850 	cik_pcie_gart_disable(rdev);
   8851 	return 0;
   8852 }
   8853 
   8854 /* Plan is to move initialization in that function and use
   8855  * helper function so that radeon_device_init pretty much
   8856  * do nothing more than calling asic specific function. This
   8857  * should also allow to remove a bunch of callback function
   8858  * like vram_info.
   8859  */
   8860 /**
   8861  * cik_init - asic specific driver and hw init
   8862  *
   8863  * @rdev: radeon_device pointer
   8864  *
   8865  * Setup asic specific driver variables and program the hw
   8866  * to a functional state (CIK).
   8867  * Called at driver startup.
   8868  * Returns 0 for success, errors for failure.
   8869  */
   8870 int cik_init(struct radeon_device *rdev)
   8871 {
   8872 	struct radeon_ring *ring;
   8873 	int r;
   8874 
   8875 	/* Read BIOS */
   8876 	if (!radeon_get_bios(rdev)) {
   8877 		if (ASIC_IS_AVIVO(rdev))
   8878 			return -EINVAL;
   8879 	}
   8880 	/* Must be an ATOMBIOS */
   8881 	if (!rdev->is_atom_bios) {
   8882 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
   8883 		return -EINVAL;
   8884 	}
   8885 	r = radeon_atombios_init(rdev);
   8886 	if (r)
   8887 		return r;
   8888 
   8889 	/* Post card if necessary */
   8890 	if (!radeon_card_posted(rdev)) {
   8891 		if (!rdev->bios) {
   8892 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
   8893 			return -EINVAL;
   8894 		}
   8895 		DRM_INFO("GPU not posted. posting now...\n");
   8896 		atom_asic_init(rdev->mode_info.atom_context);
   8897 	}
   8898 	/* init golden registers */
   8899 	cik_init_golden_registers(rdev);
   8900 	/* Initialize scratch registers */
   8901 	cik_scratch_init(rdev);
   8902 	/* Initialize surface registers */
   8903 	radeon_surface_init(rdev);
   8904 	/* Initialize clocks */
   8905 	radeon_get_clock_info(rdev->ddev);
   8906 
   8907 	/* Fence driver */
   8908 	r = radeon_fence_driver_init(rdev);
   8909 	if (r)
   8910 		return r;
   8911 
   8912 	/* initialize memory controller */
   8913 	r = cik_mc_init(rdev);
   8914 	if (r)
   8915 		return r;
   8916 	/* Memory manager */
   8917 	r = radeon_bo_init(rdev);
   8918 	if (r)
   8919 		return r;
   8920 
   8921 	if (rdev->flags & RADEON_IS_IGP) {
   8922 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
   8923 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
   8924 			r = cik_init_microcode(rdev);
   8925 			if (r) {
   8926 				DRM_ERROR("Failed to load firmware!\n");
   8927 				return r;
   8928 			}
   8929 		}
   8930 	} else {
   8931 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
   8932 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
   8933 		    !rdev->mc_fw) {
   8934 			r = cik_init_microcode(rdev);
   8935 			if (r) {
   8936 				DRM_ERROR("Failed to load firmware!\n");
   8937 				return r;
   8938 			}
   8939 		}
   8940 	}
   8941 
   8942 	/* Initialize power management */
   8943 	radeon_pm_init(rdev);
   8944 
   8945 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   8946 	ring->ring_obj = NULL;
   8947 	r600_ring_init(rdev, ring, 1024 * 1024);
   8948 
   8949 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   8950 	ring->ring_obj = NULL;
   8951 	r600_ring_init(rdev, ring, 1024 * 1024);
   8952 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
   8953 	if (r)
   8954 		return r;
   8955 
   8956 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   8957 	ring->ring_obj = NULL;
   8958 	r600_ring_init(rdev, ring, 1024 * 1024);
   8959 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
   8960 	if (r)
   8961 		return r;
   8962 
   8963 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
   8964 	ring->ring_obj = NULL;
   8965 	r600_ring_init(rdev, ring, 256 * 1024);
   8966 
   8967 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
   8968 	ring->ring_obj = NULL;
   8969 	r600_ring_init(rdev, ring, 256 * 1024);
   8970 
   8971 	r = radeon_uvd_init(rdev);
   8972 	if (!r) {
   8973 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
   8974 		ring->ring_obj = NULL;
   8975 		r600_ring_init(rdev, ring, 4096);
   8976 	}
   8977 
   8978 	r = radeon_vce_init(rdev);
   8979 	if (!r) {
   8980 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
   8981 		ring->ring_obj = NULL;
   8982 		r600_ring_init(rdev, ring, 4096);
   8983 
   8984 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
   8985 		ring->ring_obj = NULL;
   8986 		r600_ring_init(rdev, ring, 4096);
   8987 	}
   8988 
   8989 	rdev->ih.ring_obj = NULL;
   8990 	r600_ih_ring_init(rdev, 64 * 1024);
   8991 
   8992 	r = r600_pcie_gart_init(rdev);
   8993 	if (r)
   8994 		return r;
   8995 
   8996 	rdev->accel_working = true;
   8997 	r = cik_startup(rdev);
   8998 	if (r) {
   8999 		dev_err(rdev->dev, "disabling GPU acceleration\n");
   9000 		cik_cp_fini(rdev);
   9001 		cik_sdma_fini(rdev);
   9002 		cik_irq_fini(rdev);
   9003 		sumo_rlc_fini(rdev);
   9004 		cik_mec_fini(rdev);
   9005 		radeon_wb_fini(rdev);
   9006 		radeon_ib_pool_fini(rdev);
   9007 		radeon_vm_manager_fini(rdev);
   9008 		radeon_irq_kms_fini(rdev);
   9009 		cik_pcie_gart_fini(rdev);
   9010 		rdev->accel_working = false;
   9011 	}
   9012 
   9013 	/* Don't start up if the MC ucode is missing.
   9014 	 * The default clocks and voltages before the MC ucode
   9015 	 * is loaded are not suffient for advanced operations.
   9016 	 */
   9017 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
   9018 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
   9019 		return -EINVAL;
   9020 	}
   9021 
   9022 	return 0;
   9023 }
   9024 
   9025 /**
   9026  * cik_fini - asic specific driver and hw fini
   9027  *
   9028  * @rdev: radeon_device pointer
   9029  *
   9030  * Tear down the asic specific driver variables and program the hw
   9031  * to an idle state (CIK).
   9032  * Called at driver unload.
   9033  */
   9034 void cik_fini(struct radeon_device *rdev)
   9035 {
   9036 	radeon_pm_fini(rdev);
   9037 	cik_cp_fini(rdev);
   9038 	cik_sdma_fini(rdev);
   9039 	cik_fini_pg(rdev);
   9040 	cik_fini_cg(rdev);
   9041 	cik_irq_fini(rdev);
   9042 	sumo_rlc_fini(rdev);
   9043 	cik_mec_fini(rdev);
   9044 	radeon_wb_fini(rdev);
   9045 	radeon_vm_manager_fini(rdev);
   9046 	radeon_ib_pool_fini(rdev);
   9047 	radeon_irq_kms_fini(rdev);
   9048 	uvd_v1_0_fini(rdev);
   9049 	radeon_uvd_fini(rdev);
   9050 	radeon_vce_fini(rdev);
   9051 	cik_pcie_gart_fini(rdev);
   9052 	r600_vram_scratch_fini(rdev);
   9053 	radeon_gem_fini(rdev);
   9054 	radeon_fence_driver_fini(rdev);
   9055 	radeon_bo_fini(rdev);
   9056 	radeon_atombios_fini(rdev);
   9057 	kfree(rdev->bios);
   9058 	rdev->bios = NULL;
   9059 }
   9060 
   9061 void dce8_program_fmt(struct drm_encoder *encoder)
   9062 {
   9063 	struct drm_device *dev = encoder->dev;
   9064 	struct radeon_device *rdev = dev->dev_private;
   9065 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
   9066 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
   9067 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
   9068 	int bpc = 0;
   9069 	u32 tmp = 0;
   9070 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
   9071 
   9072 	if (connector) {
   9073 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
   9074 		bpc = radeon_get_monitor_bpc(connector);
   9075 		dither = radeon_connector->dither;
   9076 	}
   9077 
   9078 	/* LVDS/eDP FMT is set up by atom */
   9079 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
   9080 		return;
   9081 
   9082 	/* not needed for analog */
   9083 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
   9084 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
   9085 		return;
   9086 
   9087 	if (bpc == 0)
   9088 		return;
   9089 
   9090 	switch (bpc) {
   9091 	case 6:
   9092 		if (dither == RADEON_FMT_DITHER_ENABLE)
   9093 			/* XXX sort out optimal dither settings */
   9094 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
   9095 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
   9096 		else
   9097 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
   9098 		break;
   9099 	case 8:
   9100 		if (dither == RADEON_FMT_DITHER_ENABLE)
   9101 			/* XXX sort out optimal dither settings */
   9102 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
   9103 				FMT_RGB_RANDOM_ENABLE |
   9104 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
   9105 		else
   9106 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
   9107 		break;
   9108 	case 10:
   9109 		if (dither == RADEON_FMT_DITHER_ENABLE)
   9110 			/* XXX sort out optimal dither settings */
   9111 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
   9112 				FMT_RGB_RANDOM_ENABLE |
   9113 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
   9114 		else
   9115 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
   9116 		break;
   9117 	default:
   9118 		/* not needed */
   9119 		break;
   9120 	}
   9121 
   9122 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
   9123 }
   9124 
   9125 /* display watermark setup */
   9126 /**
   9127  * dce8_line_buffer_adjust - Set up the line buffer
   9128  *
   9129  * @rdev: radeon_device pointer
   9130  * @radeon_crtc: the selected display controller
   9131  * @mode: the current display mode on the selected display
   9132  * controller
   9133  *
   9134  * Setup up the line buffer allocation for
   9135  * the selected display controller (CIK).
   9136  * Returns the line buffer size in pixels.
   9137  */
   9138 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
   9139 				   struct radeon_crtc *radeon_crtc,
   9140 				   struct drm_display_mode *mode)
   9141 {
   9142 	u32 tmp, buffer_alloc, i;
   9143 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
   9144 	/*
   9145 	 * Line Buffer Setup
   9146 	 * There are 6 line buffers, one for each display controllers.
   9147 	 * There are 3 partitions per LB. Select the number of partitions
   9148 	 * to enable based on the display width.  For display widths larger
   9149 	 * than 4096, you need use to use 2 display controllers and combine
   9150 	 * them using the stereo blender.
   9151 	 */
   9152 	if (radeon_crtc->base.enabled && mode) {
   9153 		if (mode->crtc_hdisplay < 1920) {
   9154 			tmp = 1;
   9155 			buffer_alloc = 2;
   9156 		} else if (mode->crtc_hdisplay < 2560) {
   9157 			tmp = 2;
   9158 			buffer_alloc = 2;
   9159 		} else if (mode->crtc_hdisplay < 4096) {
   9160 			tmp = 0;
   9161 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
   9162 		} else {
   9163 			DRM_DEBUG_KMS("Mode too big for LB!\n");
   9164 			tmp = 0;
   9165 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
   9166 		}
   9167 	} else {
   9168 		tmp = 1;
   9169 		buffer_alloc = 0;
   9170 	}
   9171 
   9172 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
   9173 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
   9174 
   9175 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
   9176 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
   9177 	for (i = 0; i < rdev->usec_timeout; i++) {
   9178 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
   9179 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
   9180 			break;
   9181 		udelay(1);
   9182 	}
   9183 
   9184 	if (radeon_crtc->base.enabled && mode) {
   9185 		switch (tmp) {
   9186 		case 0:
   9187 		default:
   9188 			return 4096 * 2;
   9189 		case 1:
   9190 			return 1920 * 2;
   9191 		case 2:
   9192 			return 2560 * 2;
   9193 		}
   9194 	}
   9195 
   9196 	/* controller not enabled, so no lb used */
   9197 	return 0;
   9198 }
   9199 
   9200 /**
   9201  * cik_get_number_of_dram_channels - get the number of dram channels
   9202  *
   9203  * @rdev: radeon_device pointer
   9204  *
   9205  * Look up the number of video ram channels (CIK).
   9206  * Used for display watermark bandwidth calculations
   9207  * Returns the number of dram channels
   9208  */
   9209 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
   9210 {
   9211 	u32 tmp = RREG32(MC_SHARED_CHMAP);
   9212 
   9213 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
   9214 	case 0:
   9215 	default:
   9216 		return 1;
   9217 	case 1:
   9218 		return 2;
   9219 	case 2:
   9220 		return 4;
   9221 	case 3:
   9222 		return 8;
   9223 	case 4:
   9224 		return 3;
   9225 	case 5:
   9226 		return 6;
   9227 	case 6:
   9228 		return 10;
   9229 	case 7:
   9230 		return 12;
   9231 	case 8:
   9232 		return 16;
   9233 	}
   9234 }
   9235 
   9236 struct dce8_wm_params {
   9237 	u32 dram_channels; /* number of dram channels */
   9238 	u32 yclk;          /* bandwidth per dram data pin in kHz */
   9239 	u32 sclk;          /* engine clock in kHz */
   9240 	u32 disp_clk;      /* display clock in kHz */
   9241 	u32 src_width;     /* viewport width */
   9242 	u32 active_time;   /* active display time in ns */
   9243 	u32 blank_time;    /* blank time in ns */
   9244 	bool interlaced;    /* mode is interlaced */
   9245 	fixed20_12 vsc;    /* vertical scale ratio */
   9246 	u32 num_heads;     /* number of active crtcs */
   9247 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
   9248 	u32 lb_size;       /* line buffer allocated to pipe */
   9249 	u32 vtaps;         /* vertical scaler taps */
   9250 };
   9251 
   9252 /**
   9253  * dce8_dram_bandwidth - get the dram bandwidth
   9254  *
   9255  * @wm: watermark calculation data
   9256  *
   9257  * Calculate the raw dram bandwidth (CIK).
   9258  * Used for display watermark bandwidth calculations
   9259  * Returns the dram bandwidth in MBytes/s
   9260  */
   9261 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
   9262 {
   9263 	/* Calculate raw DRAM Bandwidth */
   9264 	fixed20_12 dram_efficiency; /* 0.7 */
   9265 	fixed20_12 yclk, dram_channels, bandwidth;
   9266 	fixed20_12 a;
   9267 
   9268 	a.full = dfixed_const(1000);
   9269 	yclk.full = dfixed_const(wm->yclk);
   9270 	yclk.full = dfixed_div(yclk, a);
   9271 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
   9272 	a.full = dfixed_const(10);
   9273 	dram_efficiency.full = dfixed_const(7);
   9274 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
   9275 	bandwidth.full = dfixed_mul(dram_channels, yclk);
   9276 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
   9277 
   9278 	return dfixed_trunc(bandwidth);
   9279 }
   9280 
   9281 /**
   9282  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
   9283  *
   9284  * @wm: watermark calculation data
   9285  *
   9286  * Calculate the dram bandwidth used for display (CIK).
   9287  * Used for display watermark bandwidth calculations
   9288  * Returns the dram bandwidth for display in MBytes/s
   9289  */
   9290 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
   9291 {
   9292 	/* Calculate DRAM Bandwidth and the part allocated to display. */
   9293 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
   9294 	fixed20_12 yclk, dram_channels, bandwidth;
   9295 	fixed20_12 a;
   9296 
   9297 	a.full = dfixed_const(1000);
   9298 	yclk.full = dfixed_const(wm->yclk);
   9299 	yclk.full = dfixed_div(yclk, a);
   9300 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
   9301 	a.full = dfixed_const(10);
   9302 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
   9303 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
   9304 	bandwidth.full = dfixed_mul(dram_channels, yclk);
   9305 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
   9306 
   9307 	return dfixed_trunc(bandwidth);
   9308 }
   9309 
   9310 /**
   9311  * dce8_data_return_bandwidth - get the data return bandwidth
   9312  *
   9313  * @wm: watermark calculation data
   9314  *
   9315  * Calculate the data return bandwidth used for display (CIK).
   9316  * Used for display watermark bandwidth calculations
   9317  * Returns the data return bandwidth in MBytes/s
   9318  */
   9319 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
   9320 {
   9321 	/* Calculate the display Data return Bandwidth */
   9322 	fixed20_12 return_efficiency; /* 0.8 */
   9323 	fixed20_12 sclk, bandwidth;
   9324 	fixed20_12 a;
   9325 
   9326 	a.full = dfixed_const(1000);
   9327 	sclk.full = dfixed_const(wm->sclk);
   9328 	sclk.full = dfixed_div(sclk, a);
   9329 	a.full = dfixed_const(10);
   9330 	return_efficiency.full = dfixed_const(8);
   9331 	return_efficiency.full = dfixed_div(return_efficiency, a);
   9332 	a.full = dfixed_const(32);
   9333 	bandwidth.full = dfixed_mul(a, sclk);
   9334 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
   9335 
   9336 	return dfixed_trunc(bandwidth);
   9337 }
   9338 
   9339 /**
   9340  * dce8_dmif_request_bandwidth - get the dmif bandwidth
   9341  *
   9342  * @wm: watermark calculation data
   9343  *
   9344  * Calculate the dmif bandwidth used for display (CIK).
   9345  * Used for display watermark bandwidth calculations
   9346  * Returns the dmif bandwidth in MBytes/s
   9347  */
   9348 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
   9349 {
   9350 	/* Calculate the DMIF Request Bandwidth */
   9351 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
   9352 	fixed20_12 disp_clk, bandwidth;
   9353 	fixed20_12 a, b;
   9354 
   9355 	a.full = dfixed_const(1000);
   9356 	disp_clk.full = dfixed_const(wm->disp_clk);
   9357 	disp_clk.full = dfixed_div(disp_clk, a);
   9358 	a.full = dfixed_const(32);
   9359 	b.full = dfixed_mul(a, disp_clk);
   9360 
   9361 	a.full = dfixed_const(10);
   9362 	disp_clk_request_efficiency.full = dfixed_const(8);
   9363 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
   9364 
   9365 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
   9366 
   9367 	return dfixed_trunc(bandwidth);
   9368 }
   9369 
   9370 /**
   9371  * dce8_available_bandwidth - get the min available bandwidth
   9372  *
   9373  * @wm: watermark calculation data
   9374  *
   9375  * Calculate the min available bandwidth used for display (CIK).
   9376  * Used for display watermark bandwidth calculations
   9377  * Returns the min available bandwidth in MBytes/s
   9378  */
   9379 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
   9380 {
   9381 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
   9382 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
   9383 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
   9384 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
   9385 
   9386 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
   9387 }
   9388 
   9389 /**
   9390  * dce8_average_bandwidth - get the average available bandwidth
   9391  *
   9392  * @wm: watermark calculation data
   9393  *
   9394  * Calculate the average available bandwidth used for display (CIK).
   9395  * Used for display watermark bandwidth calculations
   9396  * Returns the average available bandwidth in MBytes/s
   9397  */
   9398 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
   9399 {
   9400 	/* Calculate the display mode Average Bandwidth
   9401 	 * DisplayMode should contain the source and destination dimensions,
   9402 	 * timing, etc.
   9403 	 */
   9404 	fixed20_12 bpp;
   9405 	fixed20_12 line_time;
   9406 	fixed20_12 src_width;
   9407 	fixed20_12 bandwidth;
   9408 	fixed20_12 a;
   9409 
   9410 	a.full = dfixed_const(1000);
   9411 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
   9412 	line_time.full = dfixed_div(line_time, a);
   9413 	bpp.full = dfixed_const(wm->bytes_per_pixel);
   9414 	src_width.full = dfixed_const(wm->src_width);
   9415 	bandwidth.full = dfixed_mul(src_width, bpp);
   9416 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
   9417 	bandwidth.full = dfixed_div(bandwidth, line_time);
   9418 
   9419 	return dfixed_trunc(bandwidth);
   9420 }
   9421 
   9422 /**
   9423  * dce8_latency_watermark - get the latency watermark
   9424  *
   9425  * @wm: watermark calculation data
   9426  *
   9427  * Calculate the latency watermark (CIK).
   9428  * Used for display watermark bandwidth calculations
   9429  * Returns the latency watermark in ns
   9430  */
   9431 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
   9432 {
   9433 	/* First calculate the latency in ns */
   9434 	u32 mc_latency = 2000; /* 2000 ns. */
   9435 	u32 available_bandwidth = dce8_available_bandwidth(wm);
   9436 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
   9437 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
   9438 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
   9439 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
   9440 		(wm->num_heads * cursor_line_pair_return_time);
   9441 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
   9442 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
   9443 	u32 tmp, dmif_size = 12288;
   9444 	fixed20_12 a, b, c;
   9445 
   9446 	if (wm->num_heads == 0)
   9447 		return 0;
   9448 
   9449 	a.full = dfixed_const(2);
   9450 	b.full = dfixed_const(1);
   9451 	if ((wm->vsc.full > a.full) ||
   9452 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
   9453 	    (wm->vtaps >= 5) ||
   9454 	    ((wm->vsc.full >= a.full) && wm->interlaced))
   9455 		max_src_lines_per_dst_line = 4;
   9456 	else
   9457 		max_src_lines_per_dst_line = 2;
   9458 
   9459 	a.full = dfixed_const(available_bandwidth);
   9460 	b.full = dfixed_const(wm->num_heads);
   9461 	a.full = dfixed_div(a, b);
   9462 
   9463 	b.full = dfixed_const(mc_latency + 512);
   9464 	c.full = dfixed_const(wm->disp_clk);
   9465 	b.full = dfixed_div(b, c);
   9466 
   9467 	c.full = dfixed_const(dmif_size);
   9468 	b.full = dfixed_div(c, b);
   9469 
   9470 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
   9471 
   9472 	b.full = dfixed_const(1000);
   9473 	c.full = dfixed_const(wm->disp_clk);
   9474 	b.full = dfixed_div(c, b);
   9475 	c.full = dfixed_const(wm->bytes_per_pixel);
   9476 	b.full = dfixed_mul(b, c);
   9477 
   9478 	lb_fill_bw = min(tmp, dfixed_trunc(b));
   9479 
   9480 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
   9481 	b.full = dfixed_const(1000);
   9482 	c.full = dfixed_const(lb_fill_bw);
   9483 	b.full = dfixed_div(c, b);
   9484 	a.full = dfixed_div(a, b);
   9485 	line_fill_time = dfixed_trunc(a);
   9486 
   9487 	if (line_fill_time < wm->active_time)
   9488 		return latency;
   9489 	else
   9490 		return latency + (line_fill_time - wm->active_time);
   9491 
   9492 }
   9493 
   9494 /**
   9495  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
   9496  * average and available dram bandwidth
   9497  *
   9498  * @wm: watermark calculation data
   9499  *
   9500  * Check if the display average bandwidth fits in the display
   9501  * dram bandwidth (CIK).
   9502  * Used for display watermark bandwidth calculations
   9503  * Returns true if the display fits, false if not.
   9504  */
   9505 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
   9506 {
   9507 	if (dce8_average_bandwidth(wm) <=
   9508 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
   9509 		return true;
   9510 	else
   9511 		return false;
   9512 }
   9513 
   9514 /**
   9515  * dce8_average_bandwidth_vs_available_bandwidth - check
   9516  * average and available bandwidth
   9517  *
   9518  * @wm: watermark calculation data
   9519  *
   9520  * Check if the display average bandwidth fits in the display
   9521  * available bandwidth (CIK).
   9522  * Used for display watermark bandwidth calculations
   9523  * Returns true if the display fits, false if not.
   9524  */
   9525 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
   9526 {
   9527 	if (dce8_average_bandwidth(wm) <=
   9528 	    (dce8_available_bandwidth(wm) / wm->num_heads))
   9529 		return true;
   9530 	else
   9531 		return false;
   9532 }
   9533 
   9534 /**
   9535  * dce8_check_latency_hiding - check latency hiding
   9536  *
   9537  * @wm: watermark calculation data
   9538  *
   9539  * Check latency hiding (CIK).
   9540  * Used for display watermark bandwidth calculations
   9541  * Returns true if the display fits, false if not.
   9542  */
   9543 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
   9544 {
   9545 	u32 lb_partitions = wm->lb_size / wm->src_width;
   9546 	u32 line_time = wm->active_time + wm->blank_time;
   9547 	u32 latency_tolerant_lines;
   9548 	u32 latency_hiding;
   9549 	fixed20_12 a;
   9550 
   9551 	a.full = dfixed_const(1);
   9552 	if (wm->vsc.full > a.full)
   9553 		latency_tolerant_lines = 1;
   9554 	else {
   9555 		if (lb_partitions <= (wm->vtaps + 1))
   9556 			latency_tolerant_lines = 1;
   9557 		else
   9558 			latency_tolerant_lines = 2;
   9559 	}
   9560 
   9561 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
   9562 
   9563 	if (dce8_latency_watermark(wm) <= latency_hiding)
   9564 		return true;
   9565 	else
   9566 		return false;
   9567 }
   9568 
   9569 /**
   9570  * dce8_program_watermarks - program display watermarks
   9571  *
   9572  * @rdev: radeon_device pointer
   9573  * @radeon_crtc: the selected display controller
   9574  * @lb_size: line buffer size
   9575  * @num_heads: number of display controllers in use
   9576  *
   9577  * Calculate and program the display watermarks for the
   9578  * selected display controller (CIK).
   9579  */
   9580 static void dce8_program_watermarks(struct radeon_device *rdev,
   9581 				    struct radeon_crtc *radeon_crtc,
   9582 				    u32 lb_size, u32 num_heads)
   9583 {
   9584 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
   9585 	struct dce8_wm_params wm_low, wm_high;
   9586 	u32 pixel_period;
   9587 	u32 line_time = 0;
   9588 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
   9589 	u32 tmp, wm_mask;
   9590 
   9591 	if (radeon_crtc->base.enabled && num_heads && mode) {
   9592 		pixel_period = 1000000 / (u32)mode->clock;
   9593 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
   9594 
   9595 		/* watermark for high clocks */
   9596 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
   9597 		    rdev->pm.dpm_enabled) {
   9598 			wm_high.yclk =
   9599 				radeon_dpm_get_mclk(rdev, false) * 10;
   9600 			wm_high.sclk =
   9601 				radeon_dpm_get_sclk(rdev, false) * 10;
   9602 		} else {
   9603 			wm_high.yclk = rdev->pm.current_mclk * 10;
   9604 			wm_high.sclk = rdev->pm.current_sclk * 10;
   9605 		}
   9606 
   9607 		wm_high.disp_clk = mode->clock;
   9608 		wm_high.src_width = mode->crtc_hdisplay;
   9609 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
   9610 		wm_high.blank_time = line_time - wm_high.active_time;
   9611 		wm_high.interlaced = false;
   9612 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
   9613 			wm_high.interlaced = true;
   9614 		wm_high.vsc = radeon_crtc->vsc;
   9615 		wm_high.vtaps = 1;
   9616 		if (radeon_crtc->rmx_type != RMX_OFF)
   9617 			wm_high.vtaps = 2;
   9618 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
   9619 		wm_high.lb_size = lb_size;
   9620 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
   9621 		wm_high.num_heads = num_heads;
   9622 
   9623 		/* set for high clocks */
   9624 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
   9625 
   9626 		/* possibly force display priority to high */
   9627 		/* should really do this at mode validation time... */
   9628 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
   9629 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
   9630 		    !dce8_check_latency_hiding(&wm_high) ||
   9631 		    (rdev->disp_priority == 2)) {
   9632 			DRM_DEBUG_KMS("force priority to high\n");
   9633 		}
   9634 
   9635 		/* watermark for low clocks */
   9636 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
   9637 		    rdev->pm.dpm_enabled) {
   9638 			wm_low.yclk =
   9639 				radeon_dpm_get_mclk(rdev, true) * 10;
   9640 			wm_low.sclk =
   9641 				radeon_dpm_get_sclk(rdev, true) * 10;
   9642 		} else {
   9643 			wm_low.yclk = rdev->pm.current_mclk * 10;
   9644 			wm_low.sclk = rdev->pm.current_sclk * 10;
   9645 		}
   9646 
   9647 		wm_low.disp_clk = mode->clock;
   9648 		wm_low.src_width = mode->crtc_hdisplay;
   9649 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
   9650 		wm_low.blank_time = line_time - wm_low.active_time;
   9651 		wm_low.interlaced = false;
   9652 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
   9653 			wm_low.interlaced = true;
   9654 		wm_low.vsc = radeon_crtc->vsc;
   9655 		wm_low.vtaps = 1;
   9656 		if (radeon_crtc->rmx_type != RMX_OFF)
   9657 			wm_low.vtaps = 2;
   9658 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
   9659 		wm_low.lb_size = lb_size;
   9660 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
   9661 		wm_low.num_heads = num_heads;
   9662 
   9663 		/* set for low clocks */
   9664 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
   9665 
   9666 		/* possibly force display priority to high */
   9667 		/* should really do this at mode validation time... */
   9668 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
   9669 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
   9670 		    !dce8_check_latency_hiding(&wm_low) ||
   9671 		    (rdev->disp_priority == 2)) {
   9672 			DRM_DEBUG_KMS("force priority to high\n");
   9673 		}
   9674 
   9675 		/* Save number of lines the linebuffer leads before the scanout */
   9676 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
   9677 	}
   9678 
   9679 	/* select wm A */
   9680 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
   9681 	tmp = wm_mask;
   9682 	tmp &= ~LATENCY_WATERMARK_MASK(3);
   9683 	tmp |= LATENCY_WATERMARK_MASK(1);
   9684 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
   9685 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
   9686 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
   9687 		LATENCY_HIGH_WATERMARK(line_time)));
   9688 	/* select wm B */
   9689 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
   9690 	tmp &= ~LATENCY_WATERMARK_MASK(3);
   9691 	tmp |= LATENCY_WATERMARK_MASK(2);
   9692 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
   9693 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
   9694 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
   9695 		LATENCY_HIGH_WATERMARK(line_time)));
   9696 	/* restore original selection */
   9697 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
   9698 
   9699 	/* save values for DPM */
   9700 	radeon_crtc->line_time = line_time;
   9701 	radeon_crtc->wm_high = latency_watermark_a;
   9702 	radeon_crtc->wm_low = latency_watermark_b;
   9703 }
   9704 
   9705 /**
   9706  * dce8_bandwidth_update - program display watermarks
   9707  *
   9708  * @rdev: radeon_device pointer
   9709  *
   9710  * Calculate and program the display watermarks and line
   9711  * buffer allocation (CIK).
   9712  */
   9713 void dce8_bandwidth_update(struct radeon_device *rdev)
   9714 {
   9715 	struct drm_display_mode *mode = NULL;
   9716 	u32 num_heads = 0, lb_size;
   9717 	int i;
   9718 
   9719 	if (!rdev->mode_info.mode_config_initialized)
   9720 		return;
   9721 
   9722 	radeon_update_display_priority(rdev);
   9723 
   9724 	for (i = 0; i < rdev->num_crtc; i++) {
   9725 		if (rdev->mode_info.crtcs[i]->base.enabled)
   9726 			num_heads++;
   9727 	}
   9728 	for (i = 0; i < rdev->num_crtc; i++) {
   9729 		mode = &rdev->mode_info.crtcs[i]->base.mode;
   9730 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
   9731 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
   9732 	}
   9733 }
   9734 
   9735 /**
   9736  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
   9737  *
   9738  * @rdev: radeon_device pointer
   9739  *
   9740  * Fetches a GPU clock counter snapshot (SI).
   9741  * Returns the 64 bit clock counter snapshot.
   9742  */
   9743 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
   9744 {
   9745 	uint64_t clock;
   9746 
   9747 	mutex_lock(&rdev->gpu_clock_mutex);
   9748 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
   9749 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
   9750 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
   9751 	mutex_unlock(&rdev->gpu_clock_mutex);
   9752 	return clock;
   9753 }
   9754 
   9755 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
   9756                               u32 cntl_reg, u32 status_reg)
   9757 {
   9758 	int r, i;
   9759 	struct atom_clock_dividers dividers;
   9760 	uint32_t tmp;
   9761 
   9762 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
   9763 					   clock, false, &dividers);
   9764 	if (r)
   9765 		return r;
   9766 
   9767 	tmp = RREG32_SMC(cntl_reg);
   9768 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
   9769 	tmp |= dividers.post_divider;
   9770 	WREG32_SMC(cntl_reg, tmp);
   9771 
   9772 	for (i = 0; i < 100; i++) {
   9773 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
   9774 			break;
   9775 		mdelay(10);
   9776 	}
   9777 	if (i == 100)
   9778 		return -ETIMEDOUT;
   9779 
   9780 	return 0;
   9781 }
   9782 
   9783 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
   9784 {
   9785 	int r = 0;
   9786 
   9787 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
   9788 	if (r)
   9789 		return r;
   9790 
   9791 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
   9792 	return r;
   9793 }
   9794 
   9795 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
   9796 {
   9797 	int r, i;
   9798 	struct atom_clock_dividers dividers;
   9799 	u32 tmp;
   9800 
   9801 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
   9802 					   ecclk, false, &dividers);
   9803 	if (r)
   9804 		return r;
   9805 
   9806 	for (i = 0; i < 100; i++) {
   9807 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
   9808 			break;
   9809 		mdelay(10);
   9810 	}
   9811 	if (i == 100)
   9812 		return -ETIMEDOUT;
   9813 
   9814 	tmp = RREG32_SMC(CG_ECLK_CNTL);
   9815 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
   9816 	tmp |= dividers.post_divider;
   9817 	WREG32_SMC(CG_ECLK_CNTL, tmp);
   9818 
   9819 	for (i = 0; i < 100; i++) {
   9820 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
   9821 			break;
   9822 		mdelay(10);
   9823 	}
   9824 	if (i == 100)
   9825 		return -ETIMEDOUT;
   9826 
   9827 	return 0;
   9828 }
   9829 
   9830 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
   9831 {
   9832 #ifndef __NetBSD__		/* XXX radeon pcie */
   9833 	struct pci_dev *root = rdev->pdev->bus->self;
   9834 	int bridge_pos, gpu_pos;
   9835 	u32 speed_cntl, mask, current_data_rate;
   9836 	int ret, i;
   9837 	u16 tmp16;
   9838 
   9839 	if (pci_is_root_bus(rdev->pdev->bus))
   9840 		return;
   9841 
   9842 	if (radeon_pcie_gen2 == 0)
   9843 		return;
   9844 
   9845 	if (rdev->flags & RADEON_IS_IGP)
   9846 		return;
   9847 
   9848 	if (!(rdev->flags & RADEON_IS_PCIE))
   9849 		return;
   9850 
   9851 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
   9852 	if (ret != 0)
   9853 		return;
   9854 
   9855 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
   9856 		return;
   9857 
   9858 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
   9859 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
   9860 		LC_CURRENT_DATA_RATE_SHIFT;
   9861 	if (mask & DRM_PCIE_SPEED_80) {
   9862 		if (current_data_rate == 2) {
   9863 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
   9864 			return;
   9865 		}
   9866 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
   9867 	} else if (mask & DRM_PCIE_SPEED_50) {
   9868 		if (current_data_rate == 1) {
   9869 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
   9870 			return;
   9871 		}
   9872 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
   9873 	}
   9874 
   9875 	bridge_pos = pci_pcie_cap(root);
   9876 	if (!bridge_pos)
   9877 		return;
   9878 
   9879 	gpu_pos = pci_pcie_cap(rdev->pdev);
   9880 	if (!gpu_pos)
   9881 		return;
   9882 
   9883 	if (mask & DRM_PCIE_SPEED_80) {
   9884 		/* re-try equalization if gen3 is not already enabled */
   9885 		if (current_data_rate != 2) {
   9886 			u16 bridge_cfg, gpu_cfg;
   9887 			u16 bridge_cfg2, gpu_cfg2;
   9888 			u32 max_lw, current_lw, tmp;
   9889 
   9890 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
   9891 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
   9892 
   9893 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
   9894 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
   9895 
   9896 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
   9897 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
   9898 
   9899 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
   9900 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
   9901 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
   9902 
   9903 			if (current_lw < max_lw) {
   9904 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
   9905 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
   9906 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
   9907 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
   9908 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
   9909 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
   9910 				}
   9911 			}
   9912 
   9913 			for (i = 0; i < 10; i++) {
   9914 				/* check status */
   9915 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
   9916 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
   9917 					break;
   9918 
   9919 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
   9920 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
   9921 
   9922 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
   9923 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
   9924 
   9925 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
   9926 				tmp |= LC_SET_QUIESCE;
   9927 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
   9928 
   9929 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
   9930 				tmp |= LC_REDO_EQ;
   9931 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
   9932 
   9933 				mdelay(100);
   9934 
   9935 				/* linkctl */
   9936 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
   9937 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
   9938 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
   9939 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
   9940 
   9941 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
   9942 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
   9943 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
   9944 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
   9945 
   9946 				/* linkctl2 */
   9947 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
   9948 				tmp16 &= ~((1 << 4) | (7 << 9));
   9949 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
   9950 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
   9951 
   9952 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
   9953 				tmp16 &= ~((1 << 4) | (7 << 9));
   9954 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
   9955 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
   9956 
   9957 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
   9958 				tmp &= ~LC_SET_QUIESCE;
   9959 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
   9960 			}
   9961 		}
   9962 	}
   9963 
   9964 	/* set the link speed */
   9965 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
   9966 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
   9967 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
   9968 
   9969 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
   9970 	tmp16 &= ~0xf;
   9971 	if (mask & DRM_PCIE_SPEED_80)
   9972 		tmp16 |= 3; /* gen3 */
   9973 	else if (mask & DRM_PCIE_SPEED_50)
   9974 		tmp16 |= 2; /* gen2 */
   9975 	else
   9976 		tmp16 |= 1; /* gen1 */
   9977 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
   9978 
   9979 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
   9980 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
   9981 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
   9982 
   9983 	for (i = 0; i < rdev->usec_timeout; i++) {
   9984 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
   9985 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
   9986 			break;
   9987 		udelay(1);
   9988 	}
   9989 #endif
   9990 }
   9991 
   9992 static void cik_program_aspm(struct radeon_device *rdev)
   9993 {
   9994 	u32 data, orig;
   9995 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
   9996 	bool disable_clkreq = false;
   9997 
   9998 	if (radeon_aspm == 0)
   9999 		return;
   10000 
   10001 	/* XXX double check IGPs */
   10002 	if (rdev->flags & RADEON_IS_IGP)
   10003 		return;
   10004 
   10005 	if (!(rdev->flags & RADEON_IS_PCIE))
   10006 		return;
   10007 
   10008 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
   10009 	data &= ~LC_XMIT_N_FTS_MASK;
   10010 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
   10011 	if (orig != data)
   10012 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
   10013 
   10014 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
   10015 	data |= LC_GO_TO_RECOVERY;
   10016 	if (orig != data)
   10017 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
   10018 
   10019 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
   10020 	data |= P_IGNORE_EDB_ERR;
   10021 	if (orig != data)
   10022 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
   10023 
   10024 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
   10025 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
   10026 	data |= LC_PMI_TO_L1_DIS;
   10027 	if (!disable_l0s)
   10028 		data |= LC_L0S_INACTIVITY(7);
   10029 
   10030 	if (!disable_l1) {
   10031 		data |= LC_L1_INACTIVITY(7);
   10032 		data &= ~LC_PMI_TO_L1_DIS;
   10033 		if (orig != data)
   10034 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
   10035 
   10036 		if (!disable_plloff_in_l1) {
   10037 			bool clk_req_support;
   10038 
   10039 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
   10040 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
   10041 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
   10042 			if (orig != data)
   10043 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
   10044 
   10045 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
   10046 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
   10047 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
   10048 			if (orig != data)
   10049 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
   10050 
   10051 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
   10052 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
   10053 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
   10054 			if (orig != data)
   10055 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
   10056 
   10057 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
   10058 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
   10059 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
   10060 			if (orig != data)
   10061 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
   10062 
   10063 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
   10064 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
   10065 			data |= LC_DYN_LANES_PWR_STATE(3);
   10066 			if (orig != data)
   10067 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
   10068 
   10069 			if (!disable_clkreq &&
   10070 			    !pci_is_root_bus(rdev->pdev->bus)) {
   10071 #ifndef __NetBSD__		/* XXX radeon pcie */
   10072 				struct pci_dev *root = rdev->pdev->bus->self;
   10073 				u32 lnkcap;
   10074 #endif
   10075 
   10076 				clk_req_support = false;
   10077 #ifndef __NetBSD__		/* XXX radeon pcie */
   10078 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
   10079 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
   10080 					clk_req_support = true;
   10081 #endif
   10082 			} else {
   10083 				clk_req_support = false;
   10084 			}
   10085 
   10086 			if (clk_req_support) {
   10087 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
   10088 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
   10089 				if (orig != data)
   10090 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
   10091 
   10092 				orig = data = RREG32_SMC(THM_CLK_CNTL);
   10093 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
   10094 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
   10095 				if (orig != data)
   10096 					WREG32_SMC(THM_CLK_CNTL, data);
   10097 
   10098 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
   10099 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
   10100 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
   10101 				if (orig != data)
   10102 					WREG32_SMC(MISC_CLK_CTRL, data);
   10103 
   10104 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
   10105 				data &= ~BCLK_AS_XCLK;
   10106 				if (orig != data)
   10107 					WREG32_SMC(CG_CLKPIN_CNTL, data);
   10108 
   10109 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
   10110 				data &= ~FORCE_BIF_REFCLK_EN;
   10111 				if (orig != data)
   10112 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
   10113 
   10114 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
   10115 				data &= ~MPLL_CLKOUT_SEL_MASK;
   10116 				data |= MPLL_CLKOUT_SEL(4);
   10117 				if (orig != data)
   10118 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
   10119 			}
   10120 		}
   10121 	} else {
   10122 		if (orig != data)
   10123 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
   10124 	}
   10125 
   10126 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
   10127 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
   10128 	if (orig != data)
   10129 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
   10130 
   10131 	if (!disable_l0s) {
   10132 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
   10133 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
   10134 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
   10135 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
   10136 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
   10137 				data &= ~LC_L0S_INACTIVITY_MASK;
   10138 				if (orig != data)
   10139 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
   10140 			}
   10141 		}
   10142 	}
   10143 }
   10144