]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
ufs_truncate_blocks(): fix the case when size is in the last direct block
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 #define SH_MEM_CONFIG_GFX_DEFAULT \
39         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
40
41 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
50
51 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
52 MODULE_FIRMWARE("radeon/bonaire_me.bin");
53 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
55 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
57 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
58 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
59 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
60
61 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
70
71 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
72 MODULE_FIRMWARE("radeon/hawaii_me.bin");
73 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
75 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
77 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
78 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
80
81 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
87
88 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
89 MODULE_FIRMWARE("radeon/kaveri_me.bin");
90 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
92 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
93 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
94 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
95
96 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
97 MODULE_FIRMWARE("radeon/KABINI_me.bin");
98 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
99 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
100 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
101 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
102
103 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
104 MODULE_FIRMWARE("radeon/kabini_me.bin");
105 MODULE_FIRMWARE("radeon/kabini_ce.bin");
106 MODULE_FIRMWARE("radeon/kabini_mec.bin");
107 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
108 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
109
110 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
116
117 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
118 MODULE_FIRMWARE("radeon/mullins_me.bin");
119 MODULE_FIRMWARE("radeon/mullins_ce.bin");
120 MODULE_FIRMWARE("radeon/mullins_mec.bin");
121 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
122 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
123
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
129 extern void sumo_rlc_fini(struct radeon_device *rdev);
130 extern int sumo_rlc_init(struct radeon_device *rdev);
131 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
132 extern void si_rlc_reset(struct radeon_device *rdev);
133 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
134 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
135 extern int cik_sdma_resume(struct radeon_device *rdev);
136 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
137 extern void cik_sdma_fini(struct radeon_device *rdev);
138 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
139 static void cik_rlc_stop(struct radeon_device *rdev);
140 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
141 static void cik_program_aspm(struct radeon_device *rdev);
142 static void cik_init_pg(struct radeon_device *rdev);
143 static void cik_init_cg(struct radeon_device *rdev);
144 static void cik_fini_pg(struct radeon_device *rdev);
145 static void cik_fini_cg(struct radeon_device *rdev);
146 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
147                                           bool enable);
148
149 /**
150  * cik_get_allowed_info_register - fetch the register for the info ioctl
151  *
152  * @rdev: radeon_device pointer
153  * @reg: register offset in bytes
154  * @val: register value
155  *
156  * Returns 0 for success or -EINVAL for an invalid register
157  *
158  */
159 int cik_get_allowed_info_register(struct radeon_device *rdev,
160                                   u32 reg, u32 *val)
161 {
162         switch (reg) {
163         case GRBM_STATUS:
164         case GRBM_STATUS2:
165         case GRBM_STATUS_SE0:
166         case GRBM_STATUS_SE1:
167         case GRBM_STATUS_SE2:
168         case GRBM_STATUS_SE3:
169         case SRBM_STATUS:
170         case SRBM_STATUS2:
171         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
172         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
173         case UVD_STATUS:
174         /* TODO VCE */
175                 *val = RREG32(reg);
176                 return 0;
177         default:
178                 return -EINVAL;
179         }
180 }
181
182 /*
183  * Indirect registers accessor
184  */
185 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
186 {
187         unsigned long flags;
188         u32 r;
189
190         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
191         WREG32(CIK_DIDT_IND_INDEX, (reg));
192         r = RREG32(CIK_DIDT_IND_DATA);
193         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
194         return r;
195 }
196
197 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199         unsigned long flags;
200
201         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
202         WREG32(CIK_DIDT_IND_INDEX, (reg));
203         WREG32(CIK_DIDT_IND_DATA, (v));
204         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
205 }
206
207 /* get temperature in millidegrees */
208 int ci_get_temp(struct radeon_device *rdev)
209 {
210         u32 temp;
211         int actual_temp = 0;
212
213         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
214                 CTF_TEMP_SHIFT;
215
216         if (temp & 0x200)
217                 actual_temp = 255;
218         else
219                 actual_temp = temp & 0x1ff;
220
221         actual_temp = actual_temp * 1000;
222
223         return actual_temp;
224 }
225
226 /* get temperature in millidegrees */
227 int kv_get_temp(struct radeon_device *rdev)
228 {
229         u32 temp;
230         int actual_temp = 0;
231
232         temp = RREG32_SMC(0xC0300E0C);
233
234         if (temp)
235                 actual_temp = (temp / 8) - 49;
236         else
237                 actual_temp = 0;
238
239         actual_temp = actual_temp * 1000;
240
241         return actual_temp;
242 }
243
244 /*
245  * Indirect registers accessor
246  */
247 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
248 {
249         unsigned long flags;
250         u32 r;
251
252         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
253         WREG32(PCIE_INDEX, reg);
254         (void)RREG32(PCIE_INDEX);
255         r = RREG32(PCIE_DATA);
256         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
257         return r;
258 }
259
260 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
261 {
262         unsigned long flags;
263
264         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
265         WREG32(PCIE_INDEX, reg);
266         (void)RREG32(PCIE_INDEX);
267         WREG32(PCIE_DATA, v);
268         (void)RREG32(PCIE_DATA);
269         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
270 }
271
272 static const u32 spectre_rlc_save_restore_register_list[] =
273 {
274         (0x0e00 << 16) | (0xc12c >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0xc140 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0xc150 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0xc15c >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0xc168 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0xc170 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0xc178 >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0xc204 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0xc2b4 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xc2b8 >> 2),
293         0x00000000,
294         (0x0e00 << 16) | (0xc2bc >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0xc2c0 >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0x8228 >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0x829c >> 2),
301         0x00000000,
302         (0x0e00 << 16) | (0x869c >> 2),
303         0x00000000,
304         (0x0600 << 16) | (0x98f4 >> 2),
305         0x00000000,
306         (0x0e00 << 16) | (0x98f8 >> 2),
307         0x00000000,
308         (0x0e00 << 16) | (0x9900 >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0xc260 >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0x90e8 >> 2),
313         0x00000000,
314         (0x0e00 << 16) | (0x3c000 >> 2),
315         0x00000000,
316         (0x0e00 << 16) | (0x3c00c >> 2),
317         0x00000000,
318         (0x0e00 << 16) | (0x8c1c >> 2),
319         0x00000000,
320         (0x0e00 << 16) | (0x9700 >> 2),
321         0x00000000,
322         (0x0e00 << 16) | (0xcd20 >> 2),
323         0x00000000,
324         (0x4e00 << 16) | (0xcd20 >> 2),
325         0x00000000,
326         (0x5e00 << 16) | (0xcd20 >> 2),
327         0x00000000,
328         (0x6e00 << 16) | (0xcd20 >> 2),
329         0x00000000,
330         (0x7e00 << 16) | (0xcd20 >> 2),
331         0x00000000,
332         (0x8e00 << 16) | (0xcd20 >> 2),
333         0x00000000,
334         (0x9e00 << 16) | (0xcd20 >> 2),
335         0x00000000,
336         (0xae00 << 16) | (0xcd20 >> 2),
337         0x00000000,
338         (0xbe00 << 16) | (0xcd20 >> 2),
339         0x00000000,
340         (0x0e00 << 16) | (0x89bc >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0x8900 >> 2),
343         0x00000000,
344         0x3,
345         (0x0e00 << 16) | (0xc130 >> 2),
346         0x00000000,
347         (0x0e00 << 16) | (0xc134 >> 2),
348         0x00000000,
349         (0x0e00 << 16) | (0xc1fc >> 2),
350         0x00000000,
351         (0x0e00 << 16) | (0xc208 >> 2),
352         0x00000000,
353         (0x0e00 << 16) | (0xc264 >> 2),
354         0x00000000,
355         (0x0e00 << 16) | (0xc268 >> 2),
356         0x00000000,
357         (0x0e00 << 16) | (0xc26c >> 2),
358         0x00000000,
359         (0x0e00 << 16) | (0xc270 >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc274 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0xc278 >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0xc27c >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0xc280 >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0xc284 >> 2),
370         0x00000000,
371         (0x0e00 << 16) | (0xc288 >> 2),
372         0x00000000,
373         (0x0e00 << 16) | (0xc28c >> 2),
374         0x00000000,
375         (0x0e00 << 16) | (0xc290 >> 2),
376         0x00000000,
377         (0x0e00 << 16) | (0xc294 >> 2),
378         0x00000000,
379         (0x0e00 << 16) | (0xc298 >> 2),
380         0x00000000,
381         (0x0e00 << 16) | (0xc29c >> 2),
382         0x00000000,
383         (0x0e00 << 16) | (0xc2a0 >> 2),
384         0x00000000,
385         (0x0e00 << 16) | (0xc2a4 >> 2),
386         0x00000000,
387         (0x0e00 << 16) | (0xc2a8 >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0xc2ac  >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0xc2b0 >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x301d0 >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x30238 >> 2),
396         0x00000000,
397         (0x0e00 << 16) | (0x30250 >> 2),
398         0x00000000,
399         (0x0e00 << 16) | (0x30254 >> 2),
400         0x00000000,
401         (0x0e00 << 16) | (0x30258 >> 2),
402         0x00000000,
403         (0x0e00 << 16) | (0x3025c >> 2),
404         0x00000000,
405         (0x4e00 << 16) | (0xc900 >> 2),
406         0x00000000,
407         (0x5e00 << 16) | (0xc900 >> 2),
408         0x00000000,
409         (0x6e00 << 16) | (0xc900 >> 2),
410         0x00000000,
411         (0x7e00 << 16) | (0xc900 >> 2),
412         0x00000000,
413         (0x8e00 << 16) | (0xc900 >> 2),
414         0x00000000,
415         (0x9e00 << 16) | (0xc900 >> 2),
416         0x00000000,
417         (0xae00 << 16) | (0xc900 >> 2),
418         0x00000000,
419         (0xbe00 << 16) | (0xc900 >> 2),
420         0x00000000,
421         (0x4e00 << 16) | (0xc904 >> 2),
422         0x00000000,
423         (0x5e00 << 16) | (0xc904 >> 2),
424         0x00000000,
425         (0x6e00 << 16) | (0xc904 >> 2),
426         0x00000000,
427         (0x7e00 << 16) | (0xc904 >> 2),
428         0x00000000,
429         (0x8e00 << 16) | (0xc904 >> 2),
430         0x00000000,
431         (0x9e00 << 16) | (0xc904 >> 2),
432         0x00000000,
433         (0xae00 << 16) | (0xc904 >> 2),
434         0x00000000,
435         (0xbe00 << 16) | (0xc904 >> 2),
436         0x00000000,
437         (0x4e00 << 16) | (0xc908 >> 2),
438         0x00000000,
439         (0x5e00 << 16) | (0xc908 >> 2),
440         0x00000000,
441         (0x6e00 << 16) | (0xc908 >> 2),
442         0x00000000,
443         (0x7e00 << 16) | (0xc908 >> 2),
444         0x00000000,
445         (0x8e00 << 16) | (0xc908 >> 2),
446         0x00000000,
447         (0x9e00 << 16) | (0xc908 >> 2),
448         0x00000000,
449         (0xae00 << 16) | (0xc908 >> 2),
450         0x00000000,
451         (0xbe00 << 16) | (0xc908 >> 2),
452         0x00000000,
453         (0x4e00 << 16) | (0xc90c >> 2),
454         0x00000000,
455         (0x5e00 << 16) | (0xc90c >> 2),
456         0x00000000,
457         (0x6e00 << 16) | (0xc90c >> 2),
458         0x00000000,
459         (0x7e00 << 16) | (0xc90c >> 2),
460         0x00000000,
461         (0x8e00 << 16) | (0xc90c >> 2),
462         0x00000000,
463         (0x9e00 << 16) | (0xc90c >> 2),
464         0x00000000,
465         (0xae00 << 16) | (0xc90c >> 2),
466         0x00000000,
467         (0xbe00 << 16) | (0xc90c >> 2),
468         0x00000000,
469         (0x4e00 << 16) | (0xc910 >> 2),
470         0x00000000,
471         (0x5e00 << 16) | (0xc910 >> 2),
472         0x00000000,
473         (0x6e00 << 16) | (0xc910 >> 2),
474         0x00000000,
475         (0x7e00 << 16) | (0xc910 >> 2),
476         0x00000000,
477         (0x8e00 << 16) | (0xc910 >> 2),
478         0x00000000,
479         (0x9e00 << 16) | (0xc910 >> 2),
480         0x00000000,
481         (0xae00 << 16) | (0xc910 >> 2),
482         0x00000000,
483         (0xbe00 << 16) | (0xc910 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xc99c >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x9834 >> 2),
488         0x00000000,
489         (0x0000 << 16) | (0x30f00 >> 2),
490         0x00000000,
491         (0x0001 << 16) | (0x30f00 >> 2),
492         0x00000000,
493         (0x0000 << 16) | (0x30f04 >> 2),
494         0x00000000,
495         (0x0001 << 16) | (0x30f04 >> 2),
496         0x00000000,
497         (0x0000 << 16) | (0x30f08 >> 2),
498         0x00000000,
499         (0x0001 << 16) | (0x30f08 >> 2),
500         0x00000000,
501         (0x0000 << 16) | (0x30f0c >> 2),
502         0x00000000,
503         (0x0001 << 16) | (0x30f0c >> 2),
504         0x00000000,
505         (0x0600 << 16) | (0x9b7c >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x8a14 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x8a18 >> 2),
510         0x00000000,
511         (0x0600 << 16) | (0x30a00 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x8bf0 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x8bcc >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x8b24 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x30a04 >> 2),
520         0x00000000,
521         (0x0600 << 16) | (0x30a10 >> 2),
522         0x00000000,
523         (0x0600 << 16) | (0x30a14 >> 2),
524         0x00000000,
525         (0x0600 << 16) | (0x30a18 >> 2),
526         0x00000000,
527         (0x0600 << 16) | (0x30a2c >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0xc700 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0xc704 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0xc708 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0xc768 >> 2),
536         0x00000000,
537         (0x0400 << 16) | (0xc770 >> 2),
538         0x00000000,
539         (0x0400 << 16) | (0xc774 >> 2),
540         0x00000000,
541         (0x0400 << 16) | (0xc778 >> 2),
542         0x00000000,
543         (0x0400 << 16) | (0xc77c >> 2),
544         0x00000000,
545         (0x0400 << 16) | (0xc780 >> 2),
546         0x00000000,
547         (0x0400 << 16) | (0xc784 >> 2),
548         0x00000000,
549         (0x0400 << 16) | (0xc788 >> 2),
550         0x00000000,
551         (0x0400 << 16) | (0xc78c >> 2),
552         0x00000000,
553         (0x0400 << 16) | (0xc798 >> 2),
554         0x00000000,
555         (0x0400 << 16) | (0xc79c >> 2),
556         0x00000000,
557         (0x0400 << 16) | (0xc7a0 >> 2),
558         0x00000000,
559         (0x0400 << 16) | (0xc7a4 >> 2),
560         0x00000000,
561         (0x0400 << 16) | (0xc7a8 >> 2),
562         0x00000000,
563         (0x0400 << 16) | (0xc7ac >> 2),
564         0x00000000,
565         (0x0400 << 16) | (0xc7b0 >> 2),
566         0x00000000,
567         (0x0400 << 16) | (0xc7b4 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x9100 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x3c010 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x92a8 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x92ac >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x92b4 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x92b8 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x92bc >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x92c0 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x92c4 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x92c8 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x92cc >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0x92d0 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0x8c00 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0x8c04 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0x8c20 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0x8c38 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0x8c3c >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xae00 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x9604 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xac08 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xac0c >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xac10 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xac14 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xac58 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xac68 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xac6c >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0xac70 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0xac74 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0xac78 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0xac7c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0xac80 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0xac84 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0xac88 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0xac8c >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x970c >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x9714 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x9718 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x971c >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x31068 >> 2),
646         0x00000000,
647         (0x4e00 << 16) | (0x31068 >> 2),
648         0x00000000,
649         (0x5e00 << 16) | (0x31068 >> 2),
650         0x00000000,
651         (0x6e00 << 16) | (0x31068 >> 2),
652         0x00000000,
653         (0x7e00 << 16) | (0x31068 >> 2),
654         0x00000000,
655         (0x8e00 << 16) | (0x31068 >> 2),
656         0x00000000,
657         (0x9e00 << 16) | (0x31068 >> 2),
658         0x00000000,
659         (0xae00 << 16) | (0x31068 >> 2),
660         0x00000000,
661         (0xbe00 << 16) | (0x31068 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xcd10 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xcd14 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x88b0 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x88b4 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0x88b8 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x88bc >> 2),
674         0x00000000,
675         (0x0400 << 16) | (0x89c0 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x88c4 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x88c8 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x88d0 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x88d4 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0x88d8 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x8980 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x30938 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0x3093c >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x30940 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x89a0 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x30900 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30904 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x89b4 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x3c210 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x3c214 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3c218 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0x8904 >> 2),
710         0x00000000,
711         0x5,
712         (0x0e00 << 16) | (0x8c28 >> 2),
713         (0x0e00 << 16) | (0x8c2c >> 2),
714         (0x0e00 << 16) | (0x8c30 >> 2),
715         (0x0e00 << 16) | (0x8c34 >> 2),
716         (0x0e00 << 16) | (0x9600 >> 2),
717 };
718
719 static const u32 kalindi_rlc_save_restore_register_list[] =
720 {
721         (0x0e00 << 16) | (0xc12c >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0xc140 >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc150 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0xc15c >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0xc168 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0xc170 >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0xc204 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0xc2b4 >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0xc2b8 >> 2),
738         0x00000000,
739         (0x0e00 << 16) | (0xc2bc >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0xc2c0 >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0x8228 >> 2),
744         0x00000000,
745         (0x0e00 << 16) | (0x829c >> 2),
746         0x00000000,
747         (0x0e00 << 16) | (0x869c >> 2),
748         0x00000000,
749         (0x0600 << 16) | (0x98f4 >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x98f8 >> 2),
752         0x00000000,
753         (0x0e00 << 16) | (0x9900 >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0xc260 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x90e8 >> 2),
758         0x00000000,
759         (0x0e00 << 16) | (0x3c000 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x3c00c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8c1c >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x9700 >> 2),
766         0x00000000,
767         (0x0e00 << 16) | (0xcd20 >> 2),
768         0x00000000,
769         (0x4e00 << 16) | (0xcd20 >> 2),
770         0x00000000,
771         (0x5e00 << 16) | (0xcd20 >> 2),
772         0x00000000,
773         (0x6e00 << 16) | (0xcd20 >> 2),
774         0x00000000,
775         (0x7e00 << 16) | (0xcd20 >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0x89bc >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0x8900 >> 2),
780         0x00000000,
781         0x3,
782         (0x0e00 << 16) | (0xc130 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc134 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc1fc >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc208 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0xc264 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0xc268 >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0xc26c >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0xc270 >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0xc274 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0xc28c >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0xc290 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xc294 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0xc298 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0xc2a0 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0xc2a4 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0xc2a8 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0xc2ac >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0x301d0 >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x30238 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0x30250 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0x30254 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0x30258 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0x3025c >> 2),
827         0x00000000,
828         (0x4e00 << 16) | (0xc900 >> 2),
829         0x00000000,
830         (0x5e00 << 16) | (0xc900 >> 2),
831         0x00000000,
832         (0x6e00 << 16) | (0xc900 >> 2),
833         0x00000000,
834         (0x7e00 << 16) | (0xc900 >> 2),
835         0x00000000,
836         (0x4e00 << 16) | (0xc904 >> 2),
837         0x00000000,
838         (0x5e00 << 16) | (0xc904 >> 2),
839         0x00000000,
840         (0x6e00 << 16) | (0xc904 >> 2),
841         0x00000000,
842         (0x7e00 << 16) | (0xc904 >> 2),
843         0x00000000,
844         (0x4e00 << 16) | (0xc908 >> 2),
845         0x00000000,
846         (0x5e00 << 16) | (0xc908 >> 2),
847         0x00000000,
848         (0x6e00 << 16) | (0xc908 >> 2),
849         0x00000000,
850         (0x7e00 << 16) | (0xc908 >> 2),
851         0x00000000,
852         (0x4e00 << 16) | (0xc90c >> 2),
853         0x00000000,
854         (0x5e00 << 16) | (0xc90c >> 2),
855         0x00000000,
856         (0x6e00 << 16) | (0xc90c >> 2),
857         0x00000000,
858         (0x7e00 << 16) | (0xc90c >> 2),
859         0x00000000,
860         (0x4e00 << 16) | (0xc910 >> 2),
861         0x00000000,
862         (0x5e00 << 16) | (0xc910 >> 2),
863         0x00000000,
864         (0x6e00 << 16) | (0xc910 >> 2),
865         0x00000000,
866         (0x7e00 << 16) | (0xc910 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0xc99c >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x9834 >> 2),
871         0x00000000,
872         (0x0000 << 16) | (0x30f00 >> 2),
873         0x00000000,
874         (0x0000 << 16) | (0x30f04 >> 2),
875         0x00000000,
876         (0x0000 << 16) | (0x30f08 >> 2),
877         0x00000000,
878         (0x0000 << 16) | (0x30f0c >> 2),
879         0x00000000,
880         (0x0600 << 16) | (0x9b7c >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x8a14 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x8a18 >> 2),
885         0x00000000,
886         (0x0600 << 16) | (0x30a00 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x8bf0 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x8bcc >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x8b24 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x30a04 >> 2),
895         0x00000000,
896         (0x0600 << 16) | (0x30a10 >> 2),
897         0x00000000,
898         (0x0600 << 16) | (0x30a14 >> 2),
899         0x00000000,
900         (0x0600 << 16) | (0x30a18 >> 2),
901         0x00000000,
902         (0x0600 << 16) | (0x30a2c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0xc700 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0xc704 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0xc708 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0xc768 >> 2),
911         0x00000000,
912         (0x0400 << 16) | (0xc770 >> 2),
913         0x00000000,
914         (0x0400 << 16) | (0xc774 >> 2),
915         0x00000000,
916         (0x0400 << 16) | (0xc798 >> 2),
917         0x00000000,
918         (0x0400 << 16) | (0xc79c >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0x9100 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0x3c010 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0x8c00 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x8c04 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x8c20 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x8c38 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x8c3c >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0xae00 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x9604 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0xac08 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0xac0c >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0xac10 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0xac14 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0xac58 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0xac68 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0xac6c >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0xac70 >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0xac74 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0xac78 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0xac7c >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0xac80 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0xac84 >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0xac88 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0xac8c >> 2),
967         0x00000000,
968         (0x0e00 << 16) | (0x970c >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x9714 >> 2),
971         0x00000000,
972         (0x0e00 << 16) | (0x9718 >> 2),
973         0x00000000,
974         (0x0e00 << 16) | (0x971c >> 2),
975         0x00000000,
976         (0x0e00 << 16) | (0x31068 >> 2),
977         0x00000000,
978         (0x4e00 << 16) | (0x31068 >> 2),
979         0x00000000,
980         (0x5e00 << 16) | (0x31068 >> 2),
981         0x00000000,
982         (0x6e00 << 16) | (0x31068 >> 2),
983         0x00000000,
984         (0x7e00 << 16) | (0x31068 >> 2),
985         0x00000000,
986         (0x0e00 << 16) | (0xcd10 >> 2),
987         0x00000000,
988         (0x0e00 << 16) | (0xcd14 >> 2),
989         0x00000000,
990         (0x0e00 << 16) | (0x88b0 >> 2),
991         0x00000000,
992         (0x0e00 << 16) | (0x88b4 >> 2),
993         0x00000000,
994         (0x0e00 << 16) | (0x88b8 >> 2),
995         0x00000000,
996         (0x0e00 << 16) | (0x88bc >> 2),
997         0x00000000,
998         (0x0400 << 16) | (0x89c0 >> 2),
999         0x00000000,
1000         (0x0e00 << 16) | (0x88c4 >> 2),
1001         0x00000000,
1002         (0x0e00 << 16) | (0x88c8 >> 2),
1003         0x00000000,
1004         (0x0e00 << 16) | (0x88d0 >> 2),
1005         0x00000000,
1006         (0x0e00 << 16) | (0x88d4 >> 2),
1007         0x00000000,
1008         (0x0e00 << 16) | (0x88d8 >> 2),
1009         0x00000000,
1010         (0x0e00 << 16) | (0x8980 >> 2),
1011         0x00000000,
1012         (0x0e00 << 16) | (0x30938 >> 2),
1013         0x00000000,
1014         (0x0e00 << 16) | (0x3093c >> 2),
1015         0x00000000,
1016         (0x0e00 << 16) | (0x30940 >> 2),
1017         0x00000000,
1018         (0x0e00 << 16) | (0x89a0 >> 2),
1019         0x00000000,
1020         (0x0e00 << 16) | (0x30900 >> 2),
1021         0x00000000,
1022         (0x0e00 << 16) | (0x30904 >> 2),
1023         0x00000000,
1024         (0x0e00 << 16) | (0x89b4 >> 2),
1025         0x00000000,
1026         (0x0e00 << 16) | (0x3e1fc >> 2),
1027         0x00000000,
1028         (0x0e00 << 16) | (0x3c210 >> 2),
1029         0x00000000,
1030         (0x0e00 << 16) | (0x3c214 >> 2),
1031         0x00000000,
1032         (0x0e00 << 16) | (0x3c218 >> 2),
1033         0x00000000,
1034         (0x0e00 << 16) | (0x8904 >> 2),
1035         0x00000000,
1036         0x5,
1037         (0x0e00 << 16) | (0x8c28 >> 2),
1038         (0x0e00 << 16) | (0x8c2c >> 2),
1039         (0x0e00 << 16) | (0x8c30 >> 2),
1040         (0x0e00 << 16) | (0x8c34 >> 2),
1041         (0x0e00 << 16) | (0x9600 >> 2),
1042 };
1043
1044 static const u32 bonaire_golden_spm_registers[] =
1045 {
1046         0x30800, 0xe0ffffff, 0xe0000000
1047 };
1048
1049 static const u32 bonaire_golden_common_registers[] =
1050 {
1051         0xc770, 0xffffffff, 0x00000800,
1052         0xc774, 0xffffffff, 0x00000800,
1053         0xc798, 0xffffffff, 0x00007fbf,
1054         0xc79c, 0xffffffff, 0x00007faf
1055 };
1056
1057 static const u32 bonaire_golden_registers[] =
1058 {
1059         0x3354, 0x00000333, 0x00000333,
1060         0x3350, 0x000c0fc0, 0x00040200,
1061         0x9a10, 0x00010000, 0x00058208,
1062         0x3c000, 0xffff1fff, 0x00140000,
1063         0x3c200, 0xfdfc0fff, 0x00000100,
1064         0x3c234, 0x40000000, 0x40000200,
1065         0x9830, 0xffffffff, 0x00000000,
1066         0x9834, 0xf00fffff, 0x00000400,
1067         0x9838, 0x0002021c, 0x00020200,
1068         0xc78, 0x00000080, 0x00000000,
1069         0x5bb0, 0x000000f0, 0x00000070,
1070         0x5bc0, 0xf0311fff, 0x80300000,
1071         0x98f8, 0x73773777, 0x12010001,
1072         0x350c, 0x00810000, 0x408af000,
1073         0x7030, 0x31000111, 0x00000011,
1074         0x2f48, 0x73773777, 0x12010001,
1075         0x220c, 0x00007fb6, 0x0021a1b1,
1076         0x2210, 0x00007fb6, 0x002021b1,
1077         0x2180, 0x00007fb6, 0x00002191,
1078         0x2218, 0x00007fb6, 0x002121b1,
1079         0x221c, 0x00007fb6, 0x002021b1,
1080         0x21dc, 0x00007fb6, 0x00002191,
1081         0x21e0, 0x00007fb6, 0x00002191,
1082         0x3628, 0x0000003f, 0x0000000a,
1083         0x362c, 0x0000003f, 0x0000000a,
1084         0x2ae4, 0x00073ffe, 0x000022a2,
1085         0x240c, 0x000007ff, 0x00000000,
1086         0x8a14, 0xf000003f, 0x00000007,
1087         0x8bf0, 0x00002001, 0x00000001,
1088         0x8b24, 0xffffffff, 0x00ffffff,
1089         0x30a04, 0x0000ff0f, 0x00000000,
1090         0x28a4c, 0x07ffffff, 0x06000000,
1091         0x4d8, 0x00000fff, 0x00000100,
1092         0x3e78, 0x00000001, 0x00000002,
1093         0x9100, 0x03000000, 0x0362c688,
1094         0x8c00, 0x000000ff, 0x00000001,
1095         0xe40, 0x00001fff, 0x00001fff,
1096         0x9060, 0x0000007f, 0x00000020,
1097         0x9508, 0x00010000, 0x00010000,
1098         0xac14, 0x000003ff, 0x000000f3,
1099         0xac0c, 0xffffffff, 0x00001032
1100 };
1101
1102 static const u32 bonaire_mgcg_cgcg_init[] =
1103 {
1104         0xc420, 0xffffffff, 0xfffffffc,
1105         0x30800, 0xffffffff, 0xe0000000,
1106         0x3c2a0, 0xffffffff, 0x00000100,
1107         0x3c208, 0xffffffff, 0x00000100,
1108         0x3c2c0, 0xffffffff, 0xc0000100,
1109         0x3c2c8, 0xffffffff, 0xc0000100,
1110         0x3c2c4, 0xffffffff, 0xc0000100,
1111         0x55e4, 0xffffffff, 0x00600100,
1112         0x3c280, 0xffffffff, 0x00000100,
1113         0x3c214, 0xffffffff, 0x06000100,
1114         0x3c220, 0xffffffff, 0x00000100,
1115         0x3c218, 0xffffffff, 0x06000100,
1116         0x3c204, 0xffffffff, 0x00000100,
1117         0x3c2e0, 0xffffffff, 0x00000100,
1118         0x3c224, 0xffffffff, 0x00000100,
1119         0x3c200, 0xffffffff, 0x00000100,
1120         0x3c230, 0xffffffff, 0x00000100,
1121         0x3c234, 0xffffffff, 0x00000100,
1122         0x3c250, 0xffffffff, 0x00000100,
1123         0x3c254, 0xffffffff, 0x00000100,
1124         0x3c258, 0xffffffff, 0x00000100,
1125         0x3c25c, 0xffffffff, 0x00000100,
1126         0x3c260, 0xffffffff, 0x00000100,
1127         0x3c27c, 0xffffffff, 0x00000100,
1128         0x3c278, 0xffffffff, 0x00000100,
1129         0x3c210, 0xffffffff, 0x06000100,
1130         0x3c290, 0xffffffff, 0x00000100,
1131         0x3c274, 0xffffffff, 0x00000100,
1132         0x3c2b4, 0xffffffff, 0x00000100,
1133         0x3c2b0, 0xffffffff, 0x00000100,
1134         0x3c270, 0xffffffff, 0x00000100,
1135         0x30800, 0xffffffff, 0xe0000000,
1136         0x3c020, 0xffffffff, 0x00010000,
1137         0x3c024, 0xffffffff, 0x00030002,
1138         0x3c028, 0xffffffff, 0x00040007,
1139         0x3c02c, 0xffffffff, 0x00060005,
1140         0x3c030, 0xffffffff, 0x00090008,
1141         0x3c034, 0xffffffff, 0x00010000,
1142         0x3c038, 0xffffffff, 0x00030002,
1143         0x3c03c, 0xffffffff, 0x00040007,
1144         0x3c040, 0xffffffff, 0x00060005,
1145         0x3c044, 0xffffffff, 0x00090008,
1146         0x3c048, 0xffffffff, 0x00010000,
1147         0x3c04c, 0xffffffff, 0x00030002,
1148         0x3c050, 0xffffffff, 0x00040007,
1149         0x3c054, 0xffffffff, 0x00060005,
1150         0x3c058, 0xffffffff, 0x00090008,
1151         0x3c05c, 0xffffffff, 0x00010000,
1152         0x3c060, 0xffffffff, 0x00030002,
1153         0x3c064, 0xffffffff, 0x00040007,
1154         0x3c068, 0xffffffff, 0x00060005,
1155         0x3c06c, 0xffffffff, 0x00090008,
1156         0x3c070, 0xffffffff, 0x00010000,
1157         0x3c074, 0xffffffff, 0x00030002,
1158         0x3c078, 0xffffffff, 0x00040007,
1159         0x3c07c, 0xffffffff, 0x00060005,
1160         0x3c080, 0xffffffff, 0x00090008,
1161         0x3c084, 0xffffffff, 0x00010000,
1162         0x3c088, 0xffffffff, 0x00030002,
1163         0x3c08c, 0xffffffff, 0x00040007,
1164         0x3c090, 0xffffffff, 0x00060005,
1165         0x3c094, 0xffffffff, 0x00090008,
1166         0x3c098, 0xffffffff, 0x00010000,
1167         0x3c09c, 0xffffffff, 0x00030002,
1168         0x3c0a0, 0xffffffff, 0x00040007,
1169         0x3c0a4, 0xffffffff, 0x00060005,
1170         0x3c0a8, 0xffffffff, 0x00090008,
1171         0x3c000, 0xffffffff, 0x96e00200,
1172         0x8708, 0xffffffff, 0x00900100,
1173         0xc424, 0xffffffff, 0x0020003f,
1174         0x38, 0xffffffff, 0x0140001c,
1175         0x3c, 0x000f0000, 0x000f0000,
1176         0x220, 0xffffffff, 0xC060000C,
1177         0x224, 0xc0000fff, 0x00000100,
1178         0xf90, 0xffffffff, 0x00000100,
1179         0xf98, 0x00000101, 0x00000000,
1180         0x20a8, 0xffffffff, 0x00000104,
1181         0x55e4, 0xff000fff, 0x00000100,
1182         0x30cc, 0xc0000fff, 0x00000104,
1183         0xc1e4, 0x00000001, 0x00000001,
1184         0xd00c, 0xff000ff0, 0x00000100,
1185         0xd80c, 0xff000ff0, 0x00000100
1186 };
1187
1188 static const u32 spectre_golden_spm_registers[] =
1189 {
1190         0x30800, 0xe0ffffff, 0xe0000000
1191 };
1192
1193 static const u32 spectre_golden_common_registers[] =
1194 {
1195         0xc770, 0xffffffff, 0x00000800,
1196         0xc774, 0xffffffff, 0x00000800,
1197         0xc798, 0xffffffff, 0x00007fbf,
1198         0xc79c, 0xffffffff, 0x00007faf
1199 };
1200
1201 static const u32 spectre_golden_registers[] =
1202 {
1203         0x3c000, 0xffff1fff, 0x96940200,
1204         0x3c00c, 0xffff0001, 0xff000000,
1205         0x3c200, 0xfffc0fff, 0x00000100,
1206         0x6ed8, 0x00010101, 0x00010000,
1207         0x9834, 0xf00fffff, 0x00000400,
1208         0x9838, 0xfffffffc, 0x00020200,
1209         0x5bb0, 0x000000f0, 0x00000070,
1210         0x5bc0, 0xf0311fff, 0x80300000,
1211         0x98f8, 0x73773777, 0x12010001,
1212         0x9b7c, 0x00ff0000, 0x00fc0000,
1213         0x2f48, 0x73773777, 0x12010001,
1214         0x8a14, 0xf000003f, 0x00000007,
1215         0x8b24, 0xffffffff, 0x00ffffff,
1216         0x28350, 0x3f3f3fff, 0x00000082,
1217         0x28354, 0x0000003f, 0x00000000,
1218         0x3e78, 0x00000001, 0x00000002,
1219         0x913c, 0xffff03df, 0x00000004,
1220         0xc768, 0x00000008, 0x00000008,
1221         0x8c00, 0x000008ff, 0x00000800,
1222         0x9508, 0x00010000, 0x00010000,
1223         0xac0c, 0xffffffff, 0x54763210,
1224         0x214f8, 0x01ff01ff, 0x00000002,
1225         0x21498, 0x007ff800, 0x00200000,
1226         0x2015c, 0xffffffff, 0x00000f40,
1227         0x30934, 0xffffffff, 0x00000001
1228 };
1229
1230 static const u32 spectre_mgcg_cgcg_init[] =
1231 {
1232         0xc420, 0xffffffff, 0xfffffffc,
1233         0x30800, 0xffffffff, 0xe0000000,
1234         0x3c2a0, 0xffffffff, 0x00000100,
1235         0x3c208, 0xffffffff, 0x00000100,
1236         0x3c2c0, 0xffffffff, 0x00000100,
1237         0x3c2c8, 0xffffffff, 0x00000100,
1238         0x3c2c4, 0xffffffff, 0x00000100,
1239         0x55e4, 0xffffffff, 0x00600100,
1240         0x3c280, 0xffffffff, 0x00000100,
1241         0x3c214, 0xffffffff, 0x06000100,
1242         0x3c220, 0xffffffff, 0x00000100,
1243         0x3c218, 0xffffffff, 0x06000100,
1244         0x3c204, 0xffffffff, 0x00000100,
1245         0x3c2e0, 0xffffffff, 0x00000100,
1246         0x3c224, 0xffffffff, 0x00000100,
1247         0x3c200, 0xffffffff, 0x00000100,
1248         0x3c230, 0xffffffff, 0x00000100,
1249         0x3c234, 0xffffffff, 0x00000100,
1250         0x3c250, 0xffffffff, 0x00000100,
1251         0x3c254, 0xffffffff, 0x00000100,
1252         0x3c258, 0xffffffff, 0x00000100,
1253         0x3c25c, 0xffffffff, 0x00000100,
1254         0x3c260, 0xffffffff, 0x00000100,
1255         0x3c27c, 0xffffffff, 0x00000100,
1256         0x3c278, 0xffffffff, 0x00000100,
1257         0x3c210, 0xffffffff, 0x06000100,
1258         0x3c290, 0xffffffff, 0x00000100,
1259         0x3c274, 0xffffffff, 0x00000100,
1260         0x3c2b4, 0xffffffff, 0x00000100,
1261         0x3c2b0, 0xffffffff, 0x00000100,
1262         0x3c270, 0xffffffff, 0x00000100,
1263         0x30800, 0xffffffff, 0xe0000000,
1264         0x3c020, 0xffffffff, 0x00010000,
1265         0x3c024, 0xffffffff, 0x00030002,
1266         0x3c028, 0xffffffff, 0x00040007,
1267         0x3c02c, 0xffffffff, 0x00060005,
1268         0x3c030, 0xffffffff, 0x00090008,
1269         0x3c034, 0xffffffff, 0x00010000,
1270         0x3c038, 0xffffffff, 0x00030002,
1271         0x3c03c, 0xffffffff, 0x00040007,
1272         0x3c040, 0xffffffff, 0x00060005,
1273         0x3c044, 0xffffffff, 0x00090008,
1274         0x3c048, 0xffffffff, 0x00010000,
1275         0x3c04c, 0xffffffff, 0x00030002,
1276         0x3c050, 0xffffffff, 0x00040007,
1277         0x3c054, 0xffffffff, 0x00060005,
1278         0x3c058, 0xffffffff, 0x00090008,
1279         0x3c05c, 0xffffffff, 0x00010000,
1280         0x3c060, 0xffffffff, 0x00030002,
1281         0x3c064, 0xffffffff, 0x00040007,
1282         0x3c068, 0xffffffff, 0x00060005,
1283         0x3c06c, 0xffffffff, 0x00090008,
1284         0x3c070, 0xffffffff, 0x00010000,
1285         0x3c074, 0xffffffff, 0x00030002,
1286         0x3c078, 0xffffffff, 0x00040007,
1287         0x3c07c, 0xffffffff, 0x00060005,
1288         0x3c080, 0xffffffff, 0x00090008,
1289         0x3c084, 0xffffffff, 0x00010000,
1290         0x3c088, 0xffffffff, 0x00030002,
1291         0x3c08c, 0xffffffff, 0x00040007,
1292         0x3c090, 0xffffffff, 0x00060005,
1293         0x3c094, 0xffffffff, 0x00090008,
1294         0x3c098, 0xffffffff, 0x00010000,
1295         0x3c09c, 0xffffffff, 0x00030002,
1296         0x3c0a0, 0xffffffff, 0x00040007,
1297         0x3c0a4, 0xffffffff, 0x00060005,
1298         0x3c0a8, 0xffffffff, 0x00090008,
1299         0x3c0ac, 0xffffffff, 0x00010000,
1300         0x3c0b0, 0xffffffff, 0x00030002,
1301         0x3c0b4, 0xffffffff, 0x00040007,
1302         0x3c0b8, 0xffffffff, 0x00060005,
1303         0x3c0bc, 0xffffffff, 0x00090008,
1304         0x3c000, 0xffffffff, 0x96e00200,
1305         0x8708, 0xffffffff, 0x00900100,
1306         0xc424, 0xffffffff, 0x0020003f,
1307         0x38, 0xffffffff, 0x0140001c,
1308         0x3c, 0x000f0000, 0x000f0000,
1309         0x220, 0xffffffff, 0xC060000C,
1310         0x224, 0xc0000fff, 0x00000100,
1311         0xf90, 0xffffffff, 0x00000100,
1312         0xf98, 0x00000101, 0x00000000,
1313         0x20a8, 0xffffffff, 0x00000104,
1314         0x55e4, 0xff000fff, 0x00000100,
1315         0x30cc, 0xc0000fff, 0x00000104,
1316         0xc1e4, 0x00000001, 0x00000001,
1317         0xd00c, 0xff000ff0, 0x00000100,
1318         0xd80c, 0xff000ff0, 0x00000100
1319 };
1320
1321 static const u32 kalindi_golden_spm_registers[] =
1322 {
1323         0x30800, 0xe0ffffff, 0xe0000000
1324 };
1325
1326 static const u32 kalindi_golden_common_registers[] =
1327 {
1328         0xc770, 0xffffffff, 0x00000800,
1329         0xc774, 0xffffffff, 0x00000800,
1330         0xc798, 0xffffffff, 0x00007fbf,
1331         0xc79c, 0xffffffff, 0x00007faf
1332 };
1333
1334 static const u32 kalindi_golden_registers[] =
1335 {
1336         0x3c000, 0xffffdfff, 0x6e944040,
1337         0x55e4, 0xff607fff, 0xfc000100,
1338         0x3c220, 0xff000fff, 0x00000100,
1339         0x3c224, 0xff000fff, 0x00000100,
1340         0x3c200, 0xfffc0fff, 0x00000100,
1341         0x6ed8, 0x00010101, 0x00010000,
1342         0x9830, 0xffffffff, 0x00000000,
1343         0x9834, 0xf00fffff, 0x00000400,
1344         0x5bb0, 0x000000f0, 0x00000070,
1345         0x5bc0, 0xf0311fff, 0x80300000,
1346         0x98f8, 0x73773777, 0x12010001,
1347         0x98fc, 0xffffffff, 0x00000010,
1348         0x9b7c, 0x00ff0000, 0x00fc0000,
1349         0x8030, 0x00001f0f, 0x0000100a,
1350         0x2f48, 0x73773777, 0x12010001,
1351         0x2408, 0x000fffff, 0x000c007f,
1352         0x8a14, 0xf000003f, 0x00000007,
1353         0x8b24, 0x3fff3fff, 0x00ffcfff,
1354         0x30a04, 0x0000ff0f, 0x00000000,
1355         0x28a4c, 0x07ffffff, 0x06000000,
1356         0x4d8, 0x00000fff, 0x00000100,
1357         0x3e78, 0x00000001, 0x00000002,
1358         0xc768, 0x00000008, 0x00000008,
1359         0x8c00, 0x000000ff, 0x00000003,
1360         0x214f8, 0x01ff01ff, 0x00000002,
1361         0x21498, 0x007ff800, 0x00200000,
1362         0x2015c, 0xffffffff, 0x00000f40,
1363         0x88c4, 0x001f3ae3, 0x00000082,
1364         0x88d4, 0x0000001f, 0x00000010,
1365         0x30934, 0xffffffff, 0x00000000
1366 };
1367
1368 static const u32 kalindi_mgcg_cgcg_init[] =
1369 {
1370         0xc420, 0xffffffff, 0xfffffffc,
1371         0x30800, 0xffffffff, 0xe0000000,
1372         0x3c2a0, 0xffffffff, 0x00000100,
1373         0x3c208, 0xffffffff, 0x00000100,
1374         0x3c2c0, 0xffffffff, 0x00000100,
1375         0x3c2c8, 0xffffffff, 0x00000100,
1376         0x3c2c4, 0xffffffff, 0x00000100,
1377         0x55e4, 0xffffffff, 0x00600100,
1378         0x3c280, 0xffffffff, 0x00000100,
1379         0x3c214, 0xffffffff, 0x06000100,
1380         0x3c220, 0xffffffff, 0x00000100,
1381         0x3c218, 0xffffffff, 0x06000100,
1382         0x3c204, 0xffffffff, 0x00000100,
1383         0x3c2e0, 0xffffffff, 0x00000100,
1384         0x3c224, 0xffffffff, 0x00000100,
1385         0x3c200, 0xffffffff, 0x00000100,
1386         0x3c230, 0xffffffff, 0x00000100,
1387         0x3c234, 0xffffffff, 0x00000100,
1388         0x3c250, 0xffffffff, 0x00000100,
1389         0x3c254, 0xffffffff, 0x00000100,
1390         0x3c258, 0xffffffff, 0x00000100,
1391         0x3c25c, 0xffffffff, 0x00000100,
1392         0x3c260, 0xffffffff, 0x00000100,
1393         0x3c27c, 0xffffffff, 0x00000100,
1394         0x3c278, 0xffffffff, 0x00000100,
1395         0x3c210, 0xffffffff, 0x06000100,
1396         0x3c290, 0xffffffff, 0x00000100,
1397         0x3c274, 0xffffffff, 0x00000100,
1398         0x3c2b4, 0xffffffff, 0x00000100,
1399         0x3c2b0, 0xffffffff, 0x00000100,
1400         0x3c270, 0xffffffff, 0x00000100,
1401         0x30800, 0xffffffff, 0xe0000000,
1402         0x3c020, 0xffffffff, 0x00010000,
1403         0x3c024, 0xffffffff, 0x00030002,
1404         0x3c028, 0xffffffff, 0x00040007,
1405         0x3c02c, 0xffffffff, 0x00060005,
1406         0x3c030, 0xffffffff, 0x00090008,
1407         0x3c034, 0xffffffff, 0x00010000,
1408         0x3c038, 0xffffffff, 0x00030002,
1409         0x3c03c, 0xffffffff, 0x00040007,
1410         0x3c040, 0xffffffff, 0x00060005,
1411         0x3c044, 0xffffffff, 0x00090008,
1412         0x3c000, 0xffffffff, 0x96e00200,
1413         0x8708, 0xffffffff, 0x00900100,
1414         0xc424, 0xffffffff, 0x0020003f,
1415         0x38, 0xffffffff, 0x0140001c,
1416         0x3c, 0x000f0000, 0x000f0000,
1417         0x220, 0xffffffff, 0xC060000C,
1418         0x224, 0xc0000fff, 0x00000100,
1419         0x20a8, 0xffffffff, 0x00000104,
1420         0x55e4, 0xff000fff, 0x00000100,
1421         0x30cc, 0xc0000fff, 0x00000104,
1422         0xc1e4, 0x00000001, 0x00000001,
1423         0xd00c, 0xff000ff0, 0x00000100,
1424         0xd80c, 0xff000ff0, 0x00000100
1425 };
1426
1427 static const u32 hawaii_golden_spm_registers[] =
1428 {
1429         0x30800, 0xe0ffffff, 0xe0000000
1430 };
1431
1432 static const u32 hawaii_golden_common_registers[] =
1433 {
1434         0x30800, 0xffffffff, 0xe0000000,
1435         0x28350, 0xffffffff, 0x3a00161a,
1436         0x28354, 0xffffffff, 0x0000002e,
1437         0x9a10, 0xffffffff, 0x00018208,
1438         0x98f8, 0xffffffff, 0x12011003
1439 };
1440
1441 static const u32 hawaii_golden_registers[] =
1442 {
1443         0x3354, 0x00000333, 0x00000333,
1444         0x9a10, 0x00010000, 0x00058208,
1445         0x9830, 0xffffffff, 0x00000000,
1446         0x9834, 0xf00fffff, 0x00000400,
1447         0x9838, 0x0002021c, 0x00020200,
1448         0xc78, 0x00000080, 0x00000000,
1449         0x5bb0, 0x000000f0, 0x00000070,
1450         0x5bc0, 0xf0311fff, 0x80300000,
1451         0x350c, 0x00810000, 0x408af000,
1452         0x7030, 0x31000111, 0x00000011,
1453         0x2f48, 0x73773777, 0x12010001,
1454         0x2120, 0x0000007f, 0x0000001b,
1455         0x21dc, 0x00007fb6, 0x00002191,
1456         0x3628, 0x0000003f, 0x0000000a,
1457         0x362c, 0x0000003f, 0x0000000a,
1458         0x2ae4, 0x00073ffe, 0x000022a2,
1459         0x240c, 0x000007ff, 0x00000000,
1460         0x8bf0, 0x00002001, 0x00000001,
1461         0x8b24, 0xffffffff, 0x00ffffff,
1462         0x30a04, 0x0000ff0f, 0x00000000,
1463         0x28a4c, 0x07ffffff, 0x06000000,
1464         0x3e78, 0x00000001, 0x00000002,
1465         0xc768, 0x00000008, 0x00000008,
1466         0xc770, 0x00000f00, 0x00000800,
1467         0xc774, 0x00000f00, 0x00000800,
1468         0xc798, 0x00ffffff, 0x00ff7fbf,
1469         0xc79c, 0x00ffffff, 0x00ff7faf,
1470         0x8c00, 0x000000ff, 0x00000800,
1471         0xe40, 0x00001fff, 0x00001fff,
1472         0x9060, 0x0000007f, 0x00000020,
1473         0x9508, 0x00010000, 0x00010000,
1474         0xae00, 0x00100000, 0x000ff07c,
1475         0xac14, 0x000003ff, 0x0000000f,
1476         0xac10, 0xffffffff, 0x7564fdec,
1477         0xac0c, 0xffffffff, 0x3120b9a8,
1478         0xac08, 0x20000000, 0x0f9c0000
1479 };
1480
1481 static const u32 hawaii_mgcg_cgcg_init[] =
1482 {
1483         0xc420, 0xffffffff, 0xfffffffd,
1484         0x30800, 0xffffffff, 0xe0000000,
1485         0x3c2a0, 0xffffffff, 0x00000100,
1486         0x3c208, 0xffffffff, 0x00000100,
1487         0x3c2c0, 0xffffffff, 0x00000100,
1488         0x3c2c8, 0xffffffff, 0x00000100,
1489         0x3c2c4, 0xffffffff, 0x00000100,
1490         0x55e4, 0xffffffff, 0x00200100,
1491         0x3c280, 0xffffffff, 0x00000100,
1492         0x3c214, 0xffffffff, 0x06000100,
1493         0x3c220, 0xffffffff, 0x00000100,
1494         0x3c218, 0xffffffff, 0x06000100,
1495         0x3c204, 0xffffffff, 0x00000100,
1496         0x3c2e0, 0xffffffff, 0x00000100,
1497         0x3c224, 0xffffffff, 0x00000100,
1498         0x3c200, 0xffffffff, 0x00000100,
1499         0x3c230, 0xffffffff, 0x00000100,
1500         0x3c234, 0xffffffff, 0x00000100,
1501         0x3c250, 0xffffffff, 0x00000100,
1502         0x3c254, 0xffffffff, 0x00000100,
1503         0x3c258, 0xffffffff, 0x00000100,
1504         0x3c25c, 0xffffffff, 0x00000100,
1505         0x3c260, 0xffffffff, 0x00000100,
1506         0x3c27c, 0xffffffff, 0x00000100,
1507         0x3c278, 0xffffffff, 0x00000100,
1508         0x3c210, 0xffffffff, 0x06000100,
1509         0x3c290, 0xffffffff, 0x00000100,
1510         0x3c274, 0xffffffff, 0x00000100,
1511         0x3c2b4, 0xffffffff, 0x00000100,
1512         0x3c2b0, 0xffffffff, 0x00000100,
1513         0x3c270, 0xffffffff, 0x00000100,
1514         0x30800, 0xffffffff, 0xe0000000,
1515         0x3c020, 0xffffffff, 0x00010000,
1516         0x3c024, 0xffffffff, 0x00030002,
1517         0x3c028, 0xffffffff, 0x00040007,
1518         0x3c02c, 0xffffffff, 0x00060005,
1519         0x3c030, 0xffffffff, 0x00090008,
1520         0x3c034, 0xffffffff, 0x00010000,
1521         0x3c038, 0xffffffff, 0x00030002,
1522         0x3c03c, 0xffffffff, 0x00040007,
1523         0x3c040, 0xffffffff, 0x00060005,
1524         0x3c044, 0xffffffff, 0x00090008,
1525         0x3c048, 0xffffffff, 0x00010000,
1526         0x3c04c, 0xffffffff, 0x00030002,
1527         0x3c050, 0xffffffff, 0x00040007,
1528         0x3c054, 0xffffffff, 0x00060005,
1529         0x3c058, 0xffffffff, 0x00090008,
1530         0x3c05c, 0xffffffff, 0x00010000,
1531         0x3c060, 0xffffffff, 0x00030002,
1532         0x3c064, 0xffffffff, 0x00040007,
1533         0x3c068, 0xffffffff, 0x00060005,
1534         0x3c06c, 0xffffffff, 0x00090008,
1535         0x3c070, 0xffffffff, 0x00010000,
1536         0x3c074, 0xffffffff, 0x00030002,
1537         0x3c078, 0xffffffff, 0x00040007,
1538         0x3c07c, 0xffffffff, 0x00060005,
1539         0x3c080, 0xffffffff, 0x00090008,
1540         0x3c084, 0xffffffff, 0x00010000,
1541         0x3c088, 0xffffffff, 0x00030002,
1542         0x3c08c, 0xffffffff, 0x00040007,
1543         0x3c090, 0xffffffff, 0x00060005,
1544         0x3c094, 0xffffffff, 0x00090008,
1545         0x3c098, 0xffffffff, 0x00010000,
1546         0x3c09c, 0xffffffff, 0x00030002,
1547         0x3c0a0, 0xffffffff, 0x00040007,
1548         0x3c0a4, 0xffffffff, 0x00060005,
1549         0x3c0a8, 0xffffffff, 0x00090008,
1550         0x3c0ac, 0xffffffff, 0x00010000,
1551         0x3c0b0, 0xffffffff, 0x00030002,
1552         0x3c0b4, 0xffffffff, 0x00040007,
1553         0x3c0b8, 0xffffffff, 0x00060005,
1554         0x3c0bc, 0xffffffff, 0x00090008,
1555         0x3c0c0, 0xffffffff, 0x00010000,
1556         0x3c0c4, 0xffffffff, 0x00030002,
1557         0x3c0c8, 0xffffffff, 0x00040007,
1558         0x3c0cc, 0xffffffff, 0x00060005,
1559         0x3c0d0, 0xffffffff, 0x00090008,
1560         0x3c0d4, 0xffffffff, 0x00010000,
1561         0x3c0d8, 0xffffffff, 0x00030002,
1562         0x3c0dc, 0xffffffff, 0x00040007,
1563         0x3c0e0, 0xffffffff, 0x00060005,
1564         0x3c0e4, 0xffffffff, 0x00090008,
1565         0x3c0e8, 0xffffffff, 0x00010000,
1566         0x3c0ec, 0xffffffff, 0x00030002,
1567         0x3c0f0, 0xffffffff, 0x00040007,
1568         0x3c0f4, 0xffffffff, 0x00060005,
1569         0x3c0f8, 0xffffffff, 0x00090008,
1570         0xc318, 0xffffffff, 0x00020200,
1571         0x3350, 0xffffffff, 0x00000200,
1572         0x15c0, 0xffffffff, 0x00000400,
1573         0x55e8, 0xffffffff, 0x00000000,
1574         0x2f50, 0xffffffff, 0x00000902,
1575         0x3c000, 0xffffffff, 0x96940200,
1576         0x8708, 0xffffffff, 0x00900100,
1577         0xc424, 0xffffffff, 0x0020003f,
1578         0x38, 0xffffffff, 0x0140001c,
1579         0x3c, 0x000f0000, 0x000f0000,
1580         0x220, 0xffffffff, 0xc060000c,
1581         0x224, 0xc0000fff, 0x00000100,
1582         0xf90, 0xffffffff, 0x00000100,
1583         0xf98, 0x00000101, 0x00000000,
1584         0x20a8, 0xffffffff, 0x00000104,
1585         0x55e4, 0xff000fff, 0x00000100,
1586         0x30cc, 0xc0000fff, 0x00000104,
1587         0xc1e4, 0x00000001, 0x00000001,
1588         0xd00c, 0xff000ff0, 0x00000100,
1589         0xd80c, 0xff000ff0, 0x00000100
1590 };
1591
1592 static const u32 godavari_golden_registers[] =
1593 {
1594         0x55e4, 0xff607fff, 0xfc000100,
1595         0x6ed8, 0x00010101, 0x00010000,
1596         0x9830, 0xffffffff, 0x00000000,
1597         0x98302, 0xf00fffff, 0x00000400,
1598         0x6130, 0xffffffff, 0x00010000,
1599         0x5bb0, 0x000000f0, 0x00000070,
1600         0x5bc0, 0xf0311fff, 0x80300000,
1601         0x98f8, 0x73773777, 0x12010001,
1602         0x98fc, 0xffffffff, 0x00000010,
1603         0x8030, 0x00001f0f, 0x0000100a,
1604         0x2f48, 0x73773777, 0x12010001,
1605         0x2408, 0x000fffff, 0x000c007f,
1606         0x8a14, 0xf000003f, 0x00000007,
1607         0x8b24, 0xffffffff, 0x00ff0fff,
1608         0x30a04, 0x0000ff0f, 0x00000000,
1609         0x28a4c, 0x07ffffff, 0x06000000,
1610         0x4d8, 0x00000fff, 0x00000100,
1611         0xd014, 0x00010000, 0x00810001,
1612         0xd814, 0x00010000, 0x00810001,
1613         0x3e78, 0x00000001, 0x00000002,
1614         0xc768, 0x00000008, 0x00000008,
1615         0xc770, 0x00000f00, 0x00000800,
1616         0xc774, 0x00000f00, 0x00000800,
1617         0xc798, 0x00ffffff, 0x00ff7fbf,
1618         0xc79c, 0x00ffffff, 0x00ff7faf,
1619         0x8c00, 0x000000ff, 0x00000001,
1620         0x214f8, 0x01ff01ff, 0x00000002,
1621         0x21498, 0x007ff800, 0x00200000,
1622         0x2015c, 0xffffffff, 0x00000f40,
1623         0x88c4, 0x001f3ae3, 0x00000082,
1624         0x88d4, 0x0000001f, 0x00000010,
1625         0x30934, 0xffffffff, 0x00000000
1626 };
1627
1628
1629 static void cik_init_golden_registers(struct radeon_device *rdev)
1630 {
1631         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1632         mutex_lock(&rdev->grbm_idx_mutex);
1633         switch (rdev->family) {
1634         case CHIP_BONAIRE:
1635                 radeon_program_register_sequence(rdev,
1636                                                  bonaire_mgcg_cgcg_init,
1637                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1638                 radeon_program_register_sequence(rdev,
1639                                                  bonaire_golden_registers,
1640                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1641                 radeon_program_register_sequence(rdev,
1642                                                  bonaire_golden_common_registers,
1643                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1644                 radeon_program_register_sequence(rdev,
1645                                                  bonaire_golden_spm_registers,
1646                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1647                 break;
1648         case CHIP_KABINI:
1649                 radeon_program_register_sequence(rdev,
1650                                                  kalindi_mgcg_cgcg_init,
1651                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1652                 radeon_program_register_sequence(rdev,
1653                                                  kalindi_golden_registers,
1654                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1655                 radeon_program_register_sequence(rdev,
1656                                                  kalindi_golden_common_registers,
1657                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1658                 radeon_program_register_sequence(rdev,
1659                                                  kalindi_golden_spm_registers,
1660                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1661                 break;
1662         case CHIP_MULLINS:
1663                 radeon_program_register_sequence(rdev,
1664                                                  kalindi_mgcg_cgcg_init,
1665                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1666                 radeon_program_register_sequence(rdev,
1667                                                  godavari_golden_registers,
1668                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1669                 radeon_program_register_sequence(rdev,
1670                                                  kalindi_golden_common_registers,
1671                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1672                 radeon_program_register_sequence(rdev,
1673                                                  kalindi_golden_spm_registers,
1674                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1675                 break;
1676         case CHIP_KAVERI:
1677                 radeon_program_register_sequence(rdev,
1678                                                  spectre_mgcg_cgcg_init,
1679                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1680                 radeon_program_register_sequence(rdev,
1681                                                  spectre_golden_registers,
1682                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1683                 radeon_program_register_sequence(rdev,
1684                                                  spectre_golden_common_registers,
1685                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1686                 radeon_program_register_sequence(rdev,
1687                                                  spectre_golden_spm_registers,
1688                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1689                 break;
1690         case CHIP_HAWAII:
1691                 radeon_program_register_sequence(rdev,
1692                                                  hawaii_mgcg_cgcg_init,
1693                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1694                 radeon_program_register_sequence(rdev,
1695                                                  hawaii_golden_registers,
1696                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1697                 radeon_program_register_sequence(rdev,
1698                                                  hawaii_golden_common_registers,
1699                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1700                 radeon_program_register_sequence(rdev,
1701                                                  hawaii_golden_spm_registers,
1702                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1703                 break;
1704         default:
1705                 break;
1706         }
1707         mutex_unlock(&rdev->grbm_idx_mutex);
1708 }
1709
1710 /**
1711  * cik_get_xclk - get the xclk
1712  *
1713  * @rdev: radeon_device pointer
1714  *
1715  * Returns the reference clock used by the gfx engine
1716  * (CIK).
1717  */
1718 u32 cik_get_xclk(struct radeon_device *rdev)
1719 {
1720         u32 reference_clock = rdev->clock.spll.reference_freq;
1721
1722         if (rdev->flags & RADEON_IS_IGP) {
1723                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724                         return reference_clock / 2;
1725         } else {
1726                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727                         return reference_clock / 4;
1728         }
1729         return reference_clock;
1730 }
1731
1732 /**
1733  * cik_mm_rdoorbell - read a doorbell dword
1734  *
1735  * @rdev: radeon_device pointer
1736  * @index: doorbell index
1737  *
1738  * Returns the value in the doorbell aperture at the
1739  * requested doorbell index (CIK).
1740  */
1741 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742 {
1743         if (index < rdev->doorbell.num_doorbells) {
1744                 return readl(rdev->doorbell.ptr + index);
1745         } else {
1746                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747                 return 0;
1748         }
1749 }
1750
1751 /**
1752  * cik_mm_wdoorbell - write a doorbell dword
1753  *
1754  * @rdev: radeon_device pointer
1755  * @index: doorbell index
1756  * @v: value to write
1757  *
1758  * Writes @v to the doorbell aperture at the
1759  * requested doorbell index (CIK).
1760  */
1761 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762 {
1763         if (index < rdev->doorbell.num_doorbells) {
1764                 writel(v, rdev->doorbell.ptr + index);
1765         } else {
1766                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767         }
1768 }
1769
1770 #define BONAIRE_IO_MC_REGS_SIZE 36
1771
1772 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773 {
1774         {0x00000070, 0x04400000},
1775         {0x00000071, 0x80c01803},
1776         {0x00000072, 0x00004004},
1777         {0x00000073, 0x00000100},
1778         {0x00000074, 0x00ff0000},
1779         {0x00000075, 0x34000000},
1780         {0x00000076, 0x08000014},
1781         {0x00000077, 0x00cc08ec},
1782         {0x00000078, 0x00000400},
1783         {0x00000079, 0x00000000},
1784         {0x0000007a, 0x04090000},
1785         {0x0000007c, 0x00000000},
1786         {0x0000007e, 0x4408a8e8},
1787         {0x0000007f, 0x00000304},
1788         {0x00000080, 0x00000000},
1789         {0x00000082, 0x00000001},
1790         {0x00000083, 0x00000002},
1791         {0x00000084, 0xf3e4f400},
1792         {0x00000085, 0x052024e3},
1793         {0x00000087, 0x00000000},
1794         {0x00000088, 0x01000000},
1795         {0x0000008a, 0x1c0a0000},
1796         {0x0000008b, 0xff010000},
1797         {0x0000008d, 0xffffefff},
1798         {0x0000008e, 0xfff3efff},
1799         {0x0000008f, 0xfff3efbf},
1800         {0x00000092, 0xf7ffffff},
1801         {0x00000093, 0xffffff7f},
1802         {0x00000095, 0x00101101},
1803         {0x00000096, 0x00000fff},
1804         {0x00000097, 0x00116fff},
1805         {0x00000098, 0x60010000},
1806         {0x00000099, 0x10010000},
1807         {0x0000009a, 0x00006000},
1808         {0x0000009b, 0x00001000},
1809         {0x0000009f, 0x00b48000}
1810 };
1811
1812 #define HAWAII_IO_MC_REGS_SIZE 22
1813
1814 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815 {
1816         {0x0000007d, 0x40000000},
1817         {0x0000007e, 0x40180304},
1818         {0x0000007f, 0x0000ff00},
1819         {0x00000081, 0x00000000},
1820         {0x00000083, 0x00000800},
1821         {0x00000086, 0x00000000},
1822         {0x00000087, 0x00000100},
1823         {0x00000088, 0x00020100},
1824         {0x00000089, 0x00000000},
1825         {0x0000008b, 0x00040000},
1826         {0x0000008c, 0x00000100},
1827         {0x0000008e, 0xff010000},
1828         {0x00000090, 0xffffefff},
1829         {0x00000091, 0xfff3efff},
1830         {0x00000092, 0xfff3efbf},
1831         {0x00000093, 0xf7ffffff},
1832         {0x00000094, 0xffffff7f},
1833         {0x00000095, 0x00000fff},
1834         {0x00000096, 0x00116fff},
1835         {0x00000097, 0x60010000},
1836         {0x00000098, 0x10010000},
1837         {0x0000009f, 0x00c79000}
1838 };
1839
1840
1841 /**
1842  * cik_srbm_select - select specific register instances
1843  *
1844  * @rdev: radeon_device pointer
1845  * @me: selected ME (micro engine)
1846  * @pipe: pipe
1847  * @queue: queue
1848  * @vmid: VMID
1849  *
1850  * Switches the currently active registers instances.  Some
1851  * registers are instanced per VMID, others are instanced per
1852  * me/pipe/queue combination.
1853  */
1854 static void cik_srbm_select(struct radeon_device *rdev,
1855                             u32 me, u32 pipe, u32 queue, u32 vmid)
1856 {
1857         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858                              MEID(me & 0x3) |
1859                              VMID(vmid & 0xf) |
1860                              QUEUEID(queue & 0x7));
1861         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862 }
1863
1864 /* ucode loading */
1865 /**
1866  * ci_mc_load_microcode - load MC ucode into the hw
1867  *
1868  * @rdev: radeon_device pointer
1869  *
1870  * Load the GDDR MC ucode into the hw (CIK).
1871  * Returns 0 on success, error on failure.
1872  */
1873 int ci_mc_load_microcode(struct radeon_device *rdev)
1874 {
1875         const __be32 *fw_data = NULL;
1876         const __le32 *new_fw_data = NULL;
1877         u32 running, tmp;
1878         u32 *io_mc_regs = NULL;
1879         const __le32 *new_io_mc_regs = NULL;
1880         int i, regs_size, ucode_size;
1881
1882         if (!rdev->mc_fw)
1883                 return -EINVAL;
1884
1885         if (rdev->new_fw) {
1886                 const struct mc_firmware_header_v1_0 *hdr =
1887                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888
1889                 radeon_ucode_print_mc_hdr(&hdr->header);
1890
1891                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892                 new_io_mc_regs = (const __le32 *)
1893                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895                 new_fw_data = (const __le32 *)
1896                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897         } else {
1898                 ucode_size = rdev->mc_fw->size / 4;
1899
1900                 switch (rdev->family) {
1901                 case CHIP_BONAIRE:
1902                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904                         break;
1905                 case CHIP_HAWAII:
1906                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1908                         break;
1909                 default:
1910                         return -EINVAL;
1911                 }
1912                 fw_data = (const __be32 *)rdev->mc_fw->data;
1913         }
1914
1915         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916
1917         if (running == 0) {
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965         }
1966
1967         return 0;
1968 }
1969
1970 /**
1971  * cik_init_microcode - load ucode images from disk
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Use the firmware interface to load the ucode images into
1976  * the driver (not loaded into hw).
1977  * Returns 0 on success, error on failure.
1978  */
1979 static int cik_init_microcode(struct radeon_device *rdev)
1980 {
1981         const char *chip_name;
1982         const char *new_chip_name;
1983         size_t pfp_req_size, me_req_size, ce_req_size,
1984                 mec_req_size, rlc_req_size, mc_req_size = 0,
1985                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986         char fw_name[30];
1987         int new_fw = 0;
1988         int err;
1989         int num_fw;
1990         bool new_smc = false;
1991
1992         DRM_DEBUG("\n");
1993
1994         switch (rdev->family) {
1995         case CHIP_BONAIRE:
1996                 chip_name = "BONAIRE";
1997                 if ((rdev->pdev->revision == 0x80) ||
1998                     (rdev->pdev->revision == 0x81) ||
1999                     (rdev->pdev->device == 0x665f))
2000                         new_smc = true;
2001                 new_chip_name = "bonaire";
2002                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2004                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011                 num_fw = 8;
2012                 break;
2013         case CHIP_HAWAII:
2014                 chip_name = "HAWAII";
2015                 if (rdev->pdev->revision == 0x80)
2016                         new_smc = true;
2017                 new_chip_name = "hawaii";
2018                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2020                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027                 num_fw = 8;
2028                 break;
2029         case CHIP_KAVERI:
2030                 chip_name = "KAVERI";
2031                 new_chip_name = "kaveri";
2032                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2034                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038                 num_fw = 7;
2039                 break;
2040         case CHIP_KABINI:
2041                 chip_name = "KABINI";
2042                 new_chip_name = "kabini";
2043                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2045                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049                 num_fw = 6;
2050                 break;
2051         case CHIP_MULLINS:
2052                 chip_name = "MULLINS";
2053                 new_chip_name = "mullins";
2054                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2056                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060                 num_fw = 6;
2061                 break;
2062         default: BUG();
2063         }
2064
2065         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066
2067         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069         if (err) {
2070                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072                 if (err)
2073                         goto out;
2074                 if (rdev->pfp_fw->size != pfp_req_size) {
2075                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076                                rdev->pfp_fw->size, fw_name);
2077                         err = -EINVAL;
2078                         goto out;
2079                 }
2080         } else {
2081                 err = radeon_ucode_validate(rdev->pfp_fw);
2082                 if (err) {
2083                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084                                fw_name);
2085                         goto out;
2086                 } else {
2087                         new_fw++;
2088                 }
2089         }
2090
2091         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093         if (err) {
2094                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096                 if (err)
2097                         goto out;
2098                 if (rdev->me_fw->size != me_req_size) {
2099                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100                                rdev->me_fw->size, fw_name);
2101                         err = -EINVAL;
2102                 }
2103         } else {
2104                 err = radeon_ucode_validate(rdev->me_fw);
2105                 if (err) {
2106                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123                                rdev->ce_fw->size, fw_name);
2124                         err = -EINVAL;
2125                 }
2126         } else {
2127                 err = radeon_ucode_validate(rdev->ce_fw);
2128                 if (err) {
2129                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130                                fw_name);
2131                         goto out;
2132                 } else {
2133                         new_fw++;
2134                 }
2135         }
2136
2137         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139         if (err) {
2140                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142                 if (err)
2143                         goto out;
2144                 if (rdev->mec_fw->size != mec_req_size) {
2145                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146                                rdev->mec_fw->size, fw_name);
2147                         err = -EINVAL;
2148                 }
2149         } else {
2150                 err = radeon_ucode_validate(rdev->mec_fw);
2151                 if (err) {
2152                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153                                fw_name);
2154                         goto out;
2155                 } else {
2156                         new_fw++;
2157                 }
2158         }
2159
2160         if (rdev->family == CHIP_KAVERI) {
2161                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163                 if (err) {
2164                         goto out;
2165                 } else {
2166                         err = radeon_ucode_validate(rdev->mec2_fw);
2167                         if (err) {
2168                                 goto out;
2169                         } else {
2170                                 new_fw++;
2171                         }
2172                 }
2173         }
2174
2175         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177         if (err) {
2178                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180                 if (err)
2181                         goto out;
2182                 if (rdev->rlc_fw->size != rlc_req_size) {
2183                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184                                rdev->rlc_fw->size, fw_name);
2185                         err = -EINVAL;
2186                 }
2187         } else {
2188                 err = radeon_ucode_validate(rdev->rlc_fw);
2189                 if (err) {
2190                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191                                fw_name);
2192                         goto out;
2193                 } else {
2194                         new_fw++;
2195                 }
2196         }
2197
2198         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200         if (err) {
2201                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203                 if (err)
2204                         goto out;
2205                 if (rdev->sdma_fw->size != sdma_req_size) {
2206                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207                                rdev->sdma_fw->size, fw_name);
2208                         err = -EINVAL;
2209                 }
2210         } else {
2211                 err = radeon_ucode_validate(rdev->sdma_fw);
2212                 if (err) {
2213                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214                                fw_name);
2215                         goto out;
2216                 } else {
2217                         new_fw++;
2218                 }
2219         }
2220
2221         /* No SMC, MC ucode on APUs */
2222         if (!(rdev->flags & RADEON_IS_IGP)) {
2223                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225                 if (err) {
2226                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228                         if (err) {
2229                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231                                 if (err)
2232                                         goto out;
2233                         }
2234                         if ((rdev->mc_fw->size != mc_req_size) &&
2235                             (rdev->mc_fw->size != mc2_req_size)){
2236                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237                                        rdev->mc_fw->size, fw_name);
2238                                 err = -EINVAL;
2239                         }
2240                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241                 } else {
2242                         err = radeon_ucode_validate(rdev->mc_fw);
2243                         if (err) {
2244                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245                                        fw_name);
2246                                 goto out;
2247                         } else {
2248                                 new_fw++;
2249                         }
2250                 }
2251
2252                 if (new_smc)
2253                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254                 else
2255                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257                 if (err) {
2258                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260                         if (err) {
2261                                 pr_err("smc: error loading firmware \"%s\"\n",
2262                                        fw_name);
2263                                 release_firmware(rdev->smc_fw);
2264                                 rdev->smc_fw = NULL;
2265                                 err = 0;
2266                         } else if (rdev->smc_fw->size != smc_req_size) {
2267                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268                                        rdev->smc_fw->size, fw_name);
2269                                 err = -EINVAL;
2270                         }
2271                 } else {
2272                         err = radeon_ucode_validate(rdev->smc_fw);
2273                         if (err) {
2274                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275                                        fw_name);
2276                                 goto out;
2277                         } else {
2278                                 new_fw++;
2279                         }
2280                 }
2281         }
2282
2283         if (new_fw == 0) {
2284                 rdev->new_fw = false;
2285         } else if (new_fw < num_fw) {
2286                 pr_err("ci_fw: mixing new and old firmware!\n");
2287                 err = -EINVAL;
2288         } else {
2289                 rdev->new_fw = true;
2290         }
2291
2292 out:
2293         if (err) {
2294                 if (err != -EINVAL)
2295                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296                                fw_name);
2297                 release_firmware(rdev->pfp_fw);
2298                 rdev->pfp_fw = NULL;
2299                 release_firmware(rdev->me_fw);
2300                 rdev->me_fw = NULL;
2301                 release_firmware(rdev->ce_fw);
2302                 rdev->ce_fw = NULL;
2303                 release_firmware(rdev->mec_fw);
2304                 rdev->mec_fw = NULL;
2305                 release_firmware(rdev->mec2_fw);
2306                 rdev->mec2_fw = NULL;
2307                 release_firmware(rdev->rlc_fw);
2308                 rdev->rlc_fw = NULL;
2309                 release_firmware(rdev->sdma_fw);
2310                 rdev->sdma_fw = NULL;
2311                 release_firmware(rdev->mc_fw);
2312                 rdev->mc_fw = NULL;
2313                 release_firmware(rdev->smc_fw);
2314                 rdev->smc_fw = NULL;
2315         }
2316         return err;
2317 }
2318
2319 /*
2320  * Core functions
2321  */
2322 /**
2323  * cik_tiling_mode_table_init - init the hw tiling table
2324  *
2325  * @rdev: radeon_device pointer
2326  *
2327  * Starting with SI, the tiling setup is done globally in a
2328  * set of 32 tiling modes.  Rather than selecting each set of
2329  * parameters per surface as on older asics, we just select
2330  * which index in the tiling table we want to use, and the
2331  * surface uses those parameters (CIK).
2332  */
2333 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334 {
2335         u32 *tile = rdev->config.cik.tile_mode_array;
2336         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337         const u32 num_tile_mode_states =
2338                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339         const u32 num_secondary_tile_mode_states =
2340                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341         u32 reg_offset, split_equal_to_row_size;
2342         u32 num_pipe_configs;
2343         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344                 rdev->config.cik.max_shader_engines;
2345
2346         switch (rdev->config.cik.mem_row_size_in_kb) {
2347         case 1:
2348                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349                 break;
2350         case 2:
2351         default:
2352                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353                 break;
2354         case 4:
2355                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356                 break;
2357         }
2358
2359         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360         if (num_pipe_configs > 8)
2361                 num_pipe_configs = 16;
2362
2363         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364                 tile[reg_offset] = 0;
2365         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366                 macrotile[reg_offset] = 0;
2367
2368         switch(num_pipe_configs) {
2369         case 16:
2370                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                            TILE_SPLIT(split_equal_to_row_size));
2390                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(split_equal_to_row_size));
2401                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448
2449                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                            NUM_BANKS(ADDR_SURF_16_BANK));
2453                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                            NUM_BANKS(ADDR_SURF_16_BANK));
2457                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                            NUM_BANKS(ADDR_SURF_16_BANK));
2461                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                            NUM_BANKS(ADDR_SURF_16_BANK));
2465                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                            NUM_BANKS(ADDR_SURF_8_BANK));
2469                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                            NUM_BANKS(ADDR_SURF_4_BANK));
2473                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                            NUM_BANKS(ADDR_SURF_2_BANK));
2477                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                            NUM_BANKS(ADDR_SURF_16_BANK));
2481                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484                            NUM_BANKS(ADDR_SURF_16_BANK));
2485                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                             NUM_BANKS(ADDR_SURF_16_BANK));
2489                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                             NUM_BANKS(ADDR_SURF_8_BANK));
2493                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                             NUM_BANKS(ADDR_SURF_4_BANK));
2497                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                             NUM_BANKS(ADDR_SURF_2_BANK));
2501                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504                             NUM_BANKS(ADDR_SURF_2_BANK));
2505
2506                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510                 break;
2511
2512         case 8:
2513                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                            TILE_SPLIT(split_equal_to_row_size));
2533                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(split_equal_to_row_size));
2544                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591
2592                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595                                 NUM_BANKS(ADDR_SURF_16_BANK));
2596                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599                                 NUM_BANKS(ADDR_SURF_16_BANK));
2600                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603                                 NUM_BANKS(ADDR_SURF_16_BANK));
2604                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607                                 NUM_BANKS(ADDR_SURF_16_BANK));
2608                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611                                 NUM_BANKS(ADDR_SURF_8_BANK));
2612                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615                                 NUM_BANKS(ADDR_SURF_4_BANK));
2616                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619                                 NUM_BANKS(ADDR_SURF_2_BANK));
2620                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635                                 NUM_BANKS(ADDR_SURF_16_BANK));
2636                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639                                 NUM_BANKS(ADDR_SURF_8_BANK));
2640                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643                                 NUM_BANKS(ADDR_SURF_4_BANK));
2644                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647                                 NUM_BANKS(ADDR_SURF_2_BANK));
2648
2649                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653                 break;
2654
2655         case 4:
2656                 if (num_rbs == 4) {
2657                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                            TILE_SPLIT(split_equal_to_row_size));
2677                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(split_equal_to_row_size));
2688                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735
2736                 } else if (num_rbs < 4) {
2737                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756                            TILE_SPLIT(split_equal_to_row_size));
2757                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(split_equal_to_row_size));
2768                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 }
2816
2817                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                 NUM_BANKS(ADDR_SURF_8_BANK));
2841                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844                                 NUM_BANKS(ADDR_SURF_4_BANK));
2845                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868                                 NUM_BANKS(ADDR_SURF_8_BANK));
2869                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872                                 NUM_BANKS(ADDR_SURF_4_BANK));
2873
2874                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878                 break;
2879
2880         case 2:
2881                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883                            PIPE_CONFIG(ADDR_SURF_P2) |
2884                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887                            PIPE_CONFIG(ADDR_SURF_P2) |
2888                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891                            PIPE_CONFIG(ADDR_SURF_P2) |
2892                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895                            PIPE_CONFIG(ADDR_SURF_P2) |
2896                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899                            PIPE_CONFIG(ADDR_SURF_P2) |
2900                            TILE_SPLIT(split_equal_to_row_size));
2901                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(split_equal_to_row_size));
2912                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913                            PIPE_CONFIG(ADDR_SURF_P2);
2914                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                            PIPE_CONFIG(ADDR_SURF_P2));
2917                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919                             PIPE_CONFIG(ADDR_SURF_P2) |
2920                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                             PIPE_CONFIG(ADDR_SURF_P2) |
2924                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                             PIPE_CONFIG(ADDR_SURF_P2) |
2928                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                             PIPE_CONFIG(ADDR_SURF_P2) |
2931                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2) |
2943                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946                             PIPE_CONFIG(ADDR_SURF_P2));
2947                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2) |
2958                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959
2960                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979                                 NUM_BANKS(ADDR_SURF_16_BANK));
2980                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                 NUM_BANKS(ADDR_SURF_16_BANK));
2984                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987                                 NUM_BANKS(ADDR_SURF_8_BANK));
2988                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                 NUM_BANKS(ADDR_SURF_16_BANK));
2992                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011                                 NUM_BANKS(ADDR_SURF_16_BANK));
3012                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015                                 NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021                 break;
3022
3023         default:
3024                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025         }
3026 }
3027
3028 /**
3029  * cik_select_se_sh - select which SE, SH to address
3030  *
3031  * @rdev: radeon_device pointer
3032  * @se_num: shader engine to address
3033  * @sh_num: sh block to address
3034  *
3035  * Select which SE, SH combinations to address. Certain
3036  * registers are instanced per SE or SH.  0xffffffff means
3037  * broadcast to all SEs or SHs (CIK).
3038  */
3039 static void cik_select_se_sh(struct radeon_device *rdev,
3040                              u32 se_num, u32 sh_num)
3041 {
3042         u32 data = INSTANCE_BROADCAST_WRITES;
3043
3044         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046         else if (se_num == 0xffffffff)
3047                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048         else if (sh_num == 0xffffffff)
3049                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050         else
3051                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052         WREG32(GRBM_GFX_INDEX, data);
3053 }
3054
3055 /**
3056  * cik_create_bitmask - create a bitmask
3057  *
3058  * @bit_width: length of the mask
3059  *
3060  * create a variable length bit mask (CIK).
3061  * Returns the bitmask.
3062  */
3063 static u32 cik_create_bitmask(u32 bit_width)
3064 {
3065         u32 i, mask = 0;
3066
3067         for (i = 0; i < bit_width; i++) {
3068                 mask <<= 1;
3069                 mask |= 1;
3070         }
3071         return mask;
3072 }
3073
3074 /**
3075  * cik_get_rb_disabled - computes the mask of disabled RBs
3076  *
3077  * @rdev: radeon_device pointer
3078  * @max_rb_num: max RBs (render backends) for the asic
3079  * @se_num: number of SEs (shader engines) for the asic
3080  * @sh_per_se: number of SH blocks per SE for the asic
3081  *
3082  * Calculates the bitmask of disabled RBs (CIK).
3083  * Returns the disabled RB bitmask.
3084  */
3085 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086                               u32 max_rb_num_per_se,
3087                               u32 sh_per_se)
3088 {
3089         u32 data, mask;
3090
3091         data = RREG32(CC_RB_BACKEND_DISABLE);
3092         if (data & 1)
3093                 data &= BACKEND_DISABLE_MASK;
3094         else
3095                 data = 0;
3096         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097
3098         data >>= BACKEND_DISABLE_SHIFT;
3099
3100         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101
3102         return data & mask;
3103 }
3104
3105 /**
3106  * cik_setup_rb - setup the RBs on the asic
3107  *
3108  * @rdev: radeon_device pointer
3109  * @se_num: number of SEs (shader engines) for the asic
3110  * @sh_per_se: number of SH blocks per SE for the asic
3111  * @max_rb_num: max RBs (render backends) for the asic
3112  *
3113  * Configures per-SE/SH RB registers (CIK).
3114  */
3115 static void cik_setup_rb(struct radeon_device *rdev,
3116                          u32 se_num, u32 sh_per_se,
3117                          u32 max_rb_num_per_se)
3118 {
3119         int i, j;
3120         u32 data, mask;
3121         u32 disabled_rbs = 0;
3122         u32 enabled_rbs = 0;
3123
3124         mutex_lock(&rdev->grbm_idx_mutex);
3125         for (i = 0; i < se_num; i++) {
3126                 for (j = 0; j < sh_per_se; j++) {
3127                         cik_select_se_sh(rdev, i, j);
3128                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3129                         if (rdev->family == CHIP_HAWAII)
3130                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3131                         else
3132                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3133                 }
3134         }
3135         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3136         mutex_unlock(&rdev->grbm_idx_mutex);
3137
3138         mask = 1;
3139         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3140                 if (!(disabled_rbs & mask))
3141                         enabled_rbs |= mask;
3142                 mask <<= 1;
3143         }
3144
3145         rdev->config.cik.backend_enable_mask = enabled_rbs;
3146
3147         mutex_lock(&rdev->grbm_idx_mutex);
3148         for (i = 0; i < se_num; i++) {
3149                 cik_select_se_sh(rdev, i, 0xffffffff);
3150                 data = 0;
3151                 for (j = 0; j < sh_per_se; j++) {
3152                         switch (enabled_rbs & 3) {
3153                         case 0:
3154                                 if (j == 0)
3155                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3156                                 else
3157                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3158                                 break;
3159                         case 1:
3160                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3161                                 break;
3162                         case 2:
3163                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3164                                 break;
3165                         case 3:
3166                         default:
3167                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3168                                 break;
3169                         }
3170                         enabled_rbs >>= 2;
3171                 }
3172                 WREG32(PA_SC_RASTER_CONFIG, data);
3173         }
3174         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3175         mutex_unlock(&rdev->grbm_idx_mutex);
3176 }
3177
3178 /**
3179  * cik_gpu_init - setup the 3D engine
3180  *
3181  * @rdev: radeon_device pointer
3182  *
3183  * Configures the 3D engine and tiling configuration
3184  * registers so that the 3D engine is usable.
3185  */
3186 static void cik_gpu_init(struct radeon_device *rdev)
3187 {
3188         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3189         u32 mc_shared_chmap, mc_arb_ramcfg;
3190         u32 hdp_host_path_cntl;
3191         u32 tmp;
3192         int i, j;
3193
3194         switch (rdev->family) {
3195         case CHIP_BONAIRE:
3196                 rdev->config.cik.max_shader_engines = 2;
3197                 rdev->config.cik.max_tile_pipes = 4;
3198                 rdev->config.cik.max_cu_per_sh = 7;
3199                 rdev->config.cik.max_sh_per_se = 1;
3200                 rdev->config.cik.max_backends_per_se = 2;
3201                 rdev->config.cik.max_texture_channel_caches = 4;
3202                 rdev->config.cik.max_gprs = 256;
3203                 rdev->config.cik.max_gs_threads = 32;
3204                 rdev->config.cik.max_hw_contexts = 8;
3205
3206                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3207                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3208                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3209                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3210                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3211                 break;
3212         case CHIP_HAWAII:
3213                 rdev->config.cik.max_shader_engines = 4;
3214                 rdev->config.cik.max_tile_pipes = 16;
3215                 rdev->config.cik.max_cu_per_sh = 11;
3216                 rdev->config.cik.max_sh_per_se = 1;
3217                 rdev->config.cik.max_backends_per_se = 4;
3218                 rdev->config.cik.max_texture_channel_caches = 16;
3219                 rdev->config.cik.max_gprs = 256;
3220                 rdev->config.cik.max_gs_threads = 32;
3221                 rdev->config.cik.max_hw_contexts = 8;
3222
3223                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3224                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3225                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3226                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3227                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3228                 break;
3229         case CHIP_KAVERI:
3230                 rdev->config.cik.max_shader_engines = 1;
3231                 rdev->config.cik.max_tile_pipes = 4;
3232                 if ((rdev->pdev->device == 0x1304) ||
3233                     (rdev->pdev->device == 0x1305) ||
3234                     (rdev->pdev->device == 0x130C) ||
3235                     (rdev->pdev->device == 0x130F) ||
3236                     (rdev->pdev->device == 0x1310) ||
3237                     (rdev->pdev->device == 0x1311) ||
3238                     (rdev->pdev->device == 0x131C)) {
3239                         rdev->config.cik.max_cu_per_sh = 8;
3240                         rdev->config.cik.max_backends_per_se = 2;
3241                 } else if ((rdev->pdev->device == 0x1309) ||
3242                            (rdev->pdev->device == 0x130A) ||
3243                            (rdev->pdev->device == 0x130D) ||
3244                            (rdev->pdev->device == 0x1313) ||
3245                            (rdev->pdev->device == 0x131D)) {
3246                         rdev->config.cik.max_cu_per_sh = 6;
3247                         rdev->config.cik.max_backends_per_se = 2;
3248                 } else if ((rdev->pdev->device == 0x1306) ||
3249                            (rdev->pdev->device == 0x1307) ||
3250                            (rdev->pdev->device == 0x130B) ||
3251                            (rdev->pdev->device == 0x130E) ||
3252                            (rdev->pdev->device == 0x1315) ||
3253                            (rdev->pdev->device == 0x1318) ||
3254                            (rdev->pdev->device == 0x131B)) {
3255                         rdev->config.cik.max_cu_per_sh = 4;
3256                         rdev->config.cik.max_backends_per_se = 1;
3257                 } else {
3258                         rdev->config.cik.max_cu_per_sh = 3;
3259                         rdev->config.cik.max_backends_per_se = 1;
3260                 }
3261                 rdev->config.cik.max_sh_per_se = 1;
3262                 rdev->config.cik.max_texture_channel_caches = 4;
3263                 rdev->config.cik.max_gprs = 256;
3264                 rdev->config.cik.max_gs_threads = 16;
3265                 rdev->config.cik.max_hw_contexts = 8;
3266
3267                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3268                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3269                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3270                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3271                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3272                 break;
3273         case CHIP_KABINI:
3274         case CHIP_MULLINS:
3275         default:
3276                 rdev->config.cik.max_shader_engines = 1;
3277                 rdev->config.cik.max_tile_pipes = 2;
3278                 rdev->config.cik.max_cu_per_sh = 2;
3279                 rdev->config.cik.max_sh_per_se = 1;
3280                 rdev->config.cik.max_backends_per_se = 1;
3281                 rdev->config.cik.max_texture_channel_caches = 2;
3282                 rdev->config.cik.max_gprs = 256;
3283                 rdev->config.cik.max_gs_threads = 16;
3284                 rdev->config.cik.max_hw_contexts = 8;
3285
3286                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3287                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3288                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3289                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3290                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3291                 break;
3292         }
3293
3294         /* Initialize HDP */
3295         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3296                 WREG32((0x2c14 + j), 0x00000000);
3297                 WREG32((0x2c18 + j), 0x00000000);
3298                 WREG32((0x2c1c + j), 0x00000000);
3299                 WREG32((0x2c20 + j), 0x00000000);
3300                 WREG32((0x2c24 + j), 0x00000000);
3301         }
3302
3303         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3304         WREG32(SRBM_INT_CNTL, 0x1);
3305         WREG32(SRBM_INT_ACK, 0x1);
3306
3307         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3308
3309         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3310         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3311
3312         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3313         rdev->config.cik.mem_max_burst_length_bytes = 256;
3314         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3315         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3316         if (rdev->config.cik.mem_row_size_in_kb > 4)
3317                 rdev->config.cik.mem_row_size_in_kb = 4;
3318         /* XXX use MC settings? */
3319         rdev->config.cik.shader_engine_tile_size = 32;
3320         rdev->config.cik.num_gpus = 1;
3321         rdev->config.cik.multi_gpu_tile_size = 64;
3322
3323         /* fix up row size */
3324         gb_addr_config &= ~ROW_SIZE_MASK;
3325         switch (rdev->config.cik.mem_row_size_in_kb) {
3326         case 1:
3327         default:
3328                 gb_addr_config |= ROW_SIZE(0);
3329                 break;
3330         case 2:
3331                 gb_addr_config |= ROW_SIZE(1);
3332                 break;
3333         case 4:
3334                 gb_addr_config |= ROW_SIZE(2);
3335                 break;
3336         }
3337
3338         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3339          * not have bank info, so create a custom tiling dword.
3340          * bits 3:0   num_pipes
3341          * bits 7:4   num_banks
3342          * bits 11:8  group_size
3343          * bits 15:12 row_size
3344          */
3345         rdev->config.cik.tile_config = 0;
3346         switch (rdev->config.cik.num_tile_pipes) {
3347         case 1:
3348                 rdev->config.cik.tile_config |= (0 << 0);
3349                 break;
3350         case 2:
3351                 rdev->config.cik.tile_config |= (1 << 0);
3352                 break;
3353         case 4:
3354                 rdev->config.cik.tile_config |= (2 << 0);
3355                 break;
3356         case 8:
3357         default:
3358                 /* XXX what about 12? */
3359                 rdev->config.cik.tile_config |= (3 << 0);
3360                 break;
3361         }
3362         rdev->config.cik.tile_config |=
3363                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3364         rdev->config.cik.tile_config |=
3365                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3366         rdev->config.cik.tile_config |=
3367                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3368
3369         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3370         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3371         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3372         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3373         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3374         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3375         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3376         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3377
3378         cik_tiling_mode_table_init(rdev);
3379
3380         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3381                      rdev->config.cik.max_sh_per_se,
3382                      rdev->config.cik.max_backends_per_se);
3383
3384         rdev->config.cik.active_cus = 0;
3385         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3386                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3387                         rdev->config.cik.active_cus +=
3388                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3389                 }
3390         }
3391
3392         /* set HW defaults for 3D engine */
3393         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3394
3395         mutex_lock(&rdev->grbm_idx_mutex);
3396         /*
3397          * making sure that the following register writes will be broadcasted
3398          * to all the shaders
3399          */
3400         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3401         WREG32(SX_DEBUG_1, 0x20);
3402
3403         WREG32(TA_CNTL_AUX, 0x00010000);
3404
3405         tmp = RREG32(SPI_CONFIG_CNTL);
3406         tmp |= 0x03000000;
3407         WREG32(SPI_CONFIG_CNTL, tmp);
3408
3409         WREG32(SQ_CONFIG, 1);
3410
3411         WREG32(DB_DEBUG, 0);
3412
3413         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3414         tmp |= 0x00000400;
3415         WREG32(DB_DEBUG2, tmp);
3416
3417         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3418         tmp |= 0x00020200;
3419         WREG32(DB_DEBUG3, tmp);
3420
3421         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3422         tmp |= 0x00018208;
3423         WREG32(CB_HW_CONTROL, tmp);
3424
3425         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3426
3427         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3428                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3429                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3430                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3431
3432         WREG32(VGT_NUM_INSTANCES, 1);
3433
3434         WREG32(CP_PERFMON_CNTL, 0);
3435
3436         WREG32(SQ_CONFIG, 0);
3437
3438         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3439                                           FORCE_EOV_MAX_REZ_CNT(255)));
3440
3441         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3442                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3443
3444         WREG32(VGT_GS_VERTEX_REUSE, 16);
3445         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3446
3447         tmp = RREG32(HDP_MISC_CNTL);
3448         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3449         WREG32(HDP_MISC_CNTL, tmp);
3450
3451         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3452         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3453
3454         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3455         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3456         mutex_unlock(&rdev->grbm_idx_mutex);
3457
3458         udelay(50);
3459 }
3460
3461 /*
3462  * GPU scratch registers helpers function.
3463  */
3464 /**
3465  * cik_scratch_init - setup driver info for CP scratch regs
3466  *
3467  * @rdev: radeon_device pointer
3468  *
3469  * Set up the number and offset of the CP scratch registers.
3470  * NOTE: use of CP scratch registers is a legacy inferface and
3471  * is not used by default on newer asics (r6xx+).  On newer asics,
3472  * memory buffers are used for fences rather than scratch regs.
3473  */
3474 static void cik_scratch_init(struct radeon_device *rdev)
3475 {
3476         int i;
3477
3478         rdev->scratch.num_reg = 7;
3479         rdev->scratch.reg_base = SCRATCH_REG0;
3480         for (i = 0; i < rdev->scratch.num_reg; i++) {
3481                 rdev->scratch.free[i] = true;
3482                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3483         }
3484 }
3485
3486 /**
3487  * cik_ring_test - basic gfx ring test
3488  *
3489  * @rdev: radeon_device pointer
3490  * @ring: radeon_ring structure holding ring information
3491  *
3492  * Allocate a scratch register and write to it using the gfx ring (CIK).
3493  * Provides a basic gfx ring test to verify that the ring is working.
3494  * Used by cik_cp_gfx_resume();
3495  * Returns 0 on success, error on failure.
3496  */
3497 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3498 {
3499         uint32_t scratch;
3500         uint32_t tmp = 0;
3501         unsigned i;
3502         int r;
3503
3504         r = radeon_scratch_get(rdev, &scratch);
3505         if (r) {
3506                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3507                 return r;
3508         }
3509         WREG32(scratch, 0xCAFEDEAD);
3510         r = radeon_ring_lock(rdev, ring, 3);
3511         if (r) {
3512                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3513                 radeon_scratch_free(rdev, scratch);
3514                 return r;
3515         }
3516         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3517         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3518         radeon_ring_write(ring, 0xDEADBEEF);
3519         radeon_ring_unlock_commit(rdev, ring, false);
3520
3521         for (i = 0; i < rdev->usec_timeout; i++) {
3522                 tmp = RREG32(scratch);
3523                 if (tmp == 0xDEADBEEF)
3524                         break;
3525                 DRM_UDELAY(1);
3526         }
3527         if (i < rdev->usec_timeout) {
3528                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3529         } else {
3530                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3531                           ring->idx, scratch, tmp);
3532                 r = -EINVAL;
3533         }
3534         radeon_scratch_free(rdev, scratch);
3535         return r;
3536 }
3537
3538 /**
3539  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3540  *
3541  * @rdev: radeon_device pointer
3542  * @ridx: radeon ring index
3543  *
3544  * Emits an hdp flush on the cp.
3545  */
3546 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3547                                        int ridx)
3548 {
3549         struct radeon_ring *ring = &rdev->ring[ridx];
3550         u32 ref_and_mask;
3551
3552         switch (ring->idx) {
3553         case CAYMAN_RING_TYPE_CP1_INDEX:
3554         case CAYMAN_RING_TYPE_CP2_INDEX:
3555         default:
3556                 switch (ring->me) {
3557                 case 0:
3558                         ref_and_mask = CP2 << ring->pipe;
3559                         break;
3560                 case 1:
3561                         ref_and_mask = CP6 << ring->pipe;
3562                         break;
3563                 default:
3564                         return;
3565                 }
3566                 break;
3567         case RADEON_RING_TYPE_GFX_INDEX:
3568                 ref_and_mask = CP0;
3569                 break;
3570         }
3571
3572         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3573         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3574                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3575                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3576         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3577         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3578         radeon_ring_write(ring, ref_and_mask);
3579         radeon_ring_write(ring, ref_and_mask);
3580         radeon_ring_write(ring, 0x20); /* poll interval */
3581 }
3582
3583 /**
3584  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3585  *
3586  * @rdev: radeon_device pointer
3587  * @fence: radeon fence object
3588  *
3589  * Emits a fence sequnce number on the gfx ring and flushes
3590  * GPU caches.
3591  */
3592 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3593                              struct radeon_fence *fence)
3594 {
3595         struct radeon_ring *ring = &rdev->ring[fence->ring];
3596         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3597
3598         /* Workaround for cache flush problems. First send a dummy EOP
3599          * event down the pipe with seq one below.
3600          */
3601         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3602         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603                                  EOP_TC_ACTION_EN |
3604                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605                                  EVENT_INDEX(5)));
3606         radeon_ring_write(ring, addr & 0xfffffffc);
3607         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3608                                 DATA_SEL(1) | INT_SEL(0));
3609         radeon_ring_write(ring, fence->seq - 1);
3610         radeon_ring_write(ring, 0);
3611
3612         /* Then send the real EOP event down the pipe. */
3613         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3614         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3615                                  EOP_TC_ACTION_EN |
3616                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3617                                  EVENT_INDEX(5)));
3618         radeon_ring_write(ring, addr & 0xfffffffc);
3619         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3620         radeon_ring_write(ring, fence->seq);
3621         radeon_ring_write(ring, 0);
3622 }
3623
3624 /**
3625  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3626  *
3627  * @rdev: radeon_device pointer
3628  * @fence: radeon fence object
3629  *
3630  * Emits a fence sequnce number on the compute ring and flushes
3631  * GPU caches.
3632  */
3633 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3634                                  struct radeon_fence *fence)
3635 {
3636         struct radeon_ring *ring = &rdev->ring[fence->ring];
3637         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3638
3639         /* RELEASE_MEM - flush caches, send int */
3640         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3641         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3642                                  EOP_TC_ACTION_EN |
3643                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3644                                  EVENT_INDEX(5)));
3645         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3646         radeon_ring_write(ring, addr & 0xfffffffc);
3647         radeon_ring_write(ring, upper_32_bits(addr));
3648         radeon_ring_write(ring, fence->seq);
3649         radeon_ring_write(ring, 0);
3650 }
3651
3652 /**
3653  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3654  *
3655  * @rdev: radeon_device pointer
3656  * @ring: radeon ring buffer object
3657  * @semaphore: radeon semaphore object
3658  * @emit_wait: Is this a sempahore wait?
3659  *
3660  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3661  * from running ahead of semaphore waits.
3662  */
3663 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3664                              struct radeon_ring *ring,
3665                              struct radeon_semaphore *semaphore,
3666                              bool emit_wait)
3667 {
3668         uint64_t addr = semaphore->gpu_addr;
3669         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3670
3671         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3672         radeon_ring_write(ring, lower_32_bits(addr));
3673         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3674
3675         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3676                 /* Prevent the PFP from running ahead of the semaphore wait */
3677                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3678                 radeon_ring_write(ring, 0x0);
3679         }
3680
3681         return true;
3682 }
3683
3684 /**
3685  * cik_copy_cpdma - copy pages using the CP DMA engine
3686  *
3687  * @rdev: radeon_device pointer
3688  * @src_offset: src GPU address
3689  * @dst_offset: dst GPU address
3690  * @num_gpu_pages: number of GPU pages to xfer
3691  * @resv: reservation object to sync to
3692  *
3693  * Copy GPU paging using the CP DMA engine (CIK+).
3694  * Used by the radeon ttm implementation to move pages if
3695  * registered as the asic copy callback.
3696  */
3697 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3698                                     uint64_t src_offset, uint64_t dst_offset,
3699                                     unsigned num_gpu_pages,
3700                                     struct reservation_object *resv)
3701 {
3702         struct radeon_fence *fence;
3703         struct radeon_sync sync;
3704         int ring_index = rdev->asic->copy.blit_ring_index;
3705         struct radeon_ring *ring = &rdev->ring[ring_index];
3706         u32 size_in_bytes, cur_size_in_bytes, control;
3707         int i, num_loops;
3708         int r = 0;
3709
3710         radeon_sync_create(&sync);
3711
3712         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3713         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3714         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3715         if (r) {
3716                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3717                 radeon_sync_free(rdev, &sync, NULL);
3718                 return ERR_PTR(r);
3719         }
3720
3721         radeon_sync_resv(rdev, &sync, resv, false);
3722         radeon_sync_rings(rdev, &sync, ring->idx);
3723
3724         for (i = 0; i < num_loops; i++) {
3725                 cur_size_in_bytes = size_in_bytes;
3726                 if (cur_size_in_bytes > 0x1fffff)
3727                         cur_size_in_bytes = 0x1fffff;
3728                 size_in_bytes -= cur_size_in_bytes;
3729                 control = 0;
3730                 if (size_in_bytes == 0)
3731                         control |= PACKET3_DMA_DATA_CP_SYNC;
3732                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3733                 radeon_ring_write(ring, control);
3734                 radeon_ring_write(ring, lower_32_bits(src_offset));
3735                 radeon_ring_write(ring, upper_32_bits(src_offset));
3736                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3737                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3738                 radeon_ring_write(ring, cur_size_in_bytes);
3739                 src_offset += cur_size_in_bytes;
3740                 dst_offset += cur_size_in_bytes;
3741         }
3742
3743         r = radeon_fence_emit(rdev, &fence, ring->idx);
3744         if (r) {
3745                 radeon_ring_unlock_undo(rdev, ring);
3746                 radeon_sync_free(rdev, &sync, NULL);
3747                 return ERR_PTR(r);
3748         }
3749
3750         radeon_ring_unlock_commit(rdev, ring, false);
3751         radeon_sync_free(rdev, &sync, fence);
3752
3753         return fence;
3754 }
3755
3756 /*
3757  * IB stuff
3758  */
3759 /**
3760  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3761  *
3762  * @rdev: radeon_device pointer
3763  * @ib: radeon indirect buffer object
3764  *
3765  * Emits a DE (drawing engine) or CE (constant engine) IB
3766  * on the gfx ring.  IBs are usually generated by userspace
3767  * acceleration drivers and submitted to the kernel for
3768  * scheduling on the ring.  This function schedules the IB
3769  * on the gfx ring for execution by the GPU.
3770  */
3771 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3772 {
3773         struct radeon_ring *ring = &rdev->ring[ib->ring];
3774         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3775         u32 header, control = INDIRECT_BUFFER_VALID;
3776
3777         if (ib->is_const_ib) {
3778                 /* set switch buffer packet before const IB */
3779                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3780                 radeon_ring_write(ring, 0);
3781
3782                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3783         } else {
3784                 u32 next_rptr;
3785                 if (ring->rptr_save_reg) {
3786                         next_rptr = ring->wptr + 3 + 4;
3787                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3788                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3789                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3790                         radeon_ring_write(ring, next_rptr);
3791                 } else if (rdev->wb.enabled) {
3792                         next_rptr = ring->wptr + 5 + 4;
3793                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3794                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3795                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3796                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3797                         radeon_ring_write(ring, next_rptr);
3798                 }
3799
3800                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3801         }
3802
3803         control |= ib->length_dw | (vm_id << 24);
3804
3805         radeon_ring_write(ring, header);
3806         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3807         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3808         radeon_ring_write(ring, control);
3809 }
3810
3811 /**
3812  * cik_ib_test - basic gfx ring IB test
3813  *
3814  * @rdev: radeon_device pointer
3815  * @ring: radeon_ring structure holding ring information
3816  *
3817  * Allocate an IB and execute it on the gfx ring (CIK).
3818  * Provides a basic gfx ring test to verify that IBs are working.
3819  * Returns 0 on success, error on failure.
3820  */
3821 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3822 {
3823         struct radeon_ib ib;
3824         uint32_t scratch;
3825         uint32_t tmp = 0;
3826         unsigned i;
3827         int r;
3828
3829         r = radeon_scratch_get(rdev, &scratch);
3830         if (r) {
3831                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3832                 return r;
3833         }
3834         WREG32(scratch, 0xCAFEDEAD);
3835         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3836         if (r) {
3837                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3838                 radeon_scratch_free(rdev, scratch);
3839                 return r;
3840         }
3841         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3842         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3843         ib.ptr[2] = 0xDEADBEEF;
3844         ib.length_dw = 3;
3845         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3846         if (r) {
3847                 radeon_scratch_free(rdev, scratch);
3848                 radeon_ib_free(rdev, &ib);
3849                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3850                 return r;
3851         }
3852         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3853                 RADEON_USEC_IB_TEST_TIMEOUT));
3854         if (r < 0) {
3855                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3856                 radeon_scratch_free(rdev, scratch);
3857                 radeon_ib_free(rdev, &ib);
3858                 return r;
3859         } else if (r == 0) {
3860                 DRM_ERROR("radeon: fence wait timed out.\n");
3861                 radeon_scratch_free(rdev, scratch);
3862                 radeon_ib_free(rdev, &ib);
3863                 return -ETIMEDOUT;
3864         }
3865         r = 0;
3866         for (i = 0; i < rdev->usec_timeout; i++) {
3867                 tmp = RREG32(scratch);
3868                 if (tmp == 0xDEADBEEF)
3869                         break;
3870                 DRM_UDELAY(1);
3871         }
3872         if (i < rdev->usec_timeout) {
3873                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3874         } else {
3875                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3876                           scratch, tmp);
3877                 r = -EINVAL;
3878         }
3879         radeon_scratch_free(rdev, scratch);
3880         radeon_ib_free(rdev, &ib);
3881         return r;
3882 }
3883
3884 /*
3885  * CP.
3886  * On CIK, gfx and compute now have independant command processors.
3887  *
3888  * GFX
3889  * Gfx consists of a single ring and can process both gfx jobs and
3890  * compute jobs.  The gfx CP consists of three microengines (ME):
3891  * PFP - Pre-Fetch Parser
3892  * ME - Micro Engine
3893  * CE - Constant Engine
3894  * The PFP and ME make up what is considered the Drawing Engine (DE).
3895  * The CE is an asynchronous engine used for updating buffer desciptors
3896  * used by the DE so that they can be loaded into cache in parallel
3897  * while the DE is processing state update packets.
3898  *
3899  * Compute
3900  * The compute CP consists of two microengines (ME):
3901  * MEC1 - Compute MicroEngine 1
3902  * MEC2 - Compute MicroEngine 2
3903  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3904  * The queues are exposed to userspace and are programmed directly
3905  * by the compute runtime.
3906  */
3907 /**
3908  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3909  *
3910  * @rdev: radeon_device pointer
3911  * @enable: enable or disable the MEs
3912  *
3913  * Halts or unhalts the gfx MEs.
3914  */
3915 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3916 {
3917         if (enable)
3918                 WREG32(CP_ME_CNTL, 0);
3919         else {
3920                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3921                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3922                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3923                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3924         }
3925         udelay(50);
3926 }
3927
3928 /**
3929  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3930  *
3931  * @rdev: radeon_device pointer
3932  *
3933  * Loads the gfx PFP, ME, and CE ucode.
3934  * Returns 0 for success, -EINVAL if the ucode is not available.
3935  */
3936 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3937 {
3938         int i;
3939
3940         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3941                 return -EINVAL;
3942
3943         cik_cp_gfx_enable(rdev, false);
3944
3945         if (rdev->new_fw) {
3946                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3947                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3948                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3949                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3950                 const struct gfx_firmware_header_v1_0 *me_hdr =
3951                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3952                 const __le32 *fw_data;
3953                 u32 fw_size;
3954
3955                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3956                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3957                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3958
3959                 /* PFP */
3960                 fw_data = (const __le32 *)
3961                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3962                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3963                 WREG32(CP_PFP_UCODE_ADDR, 0);
3964                 for (i = 0; i < fw_size; i++)
3965                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3966                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3967
3968                 /* CE */
3969                 fw_data = (const __le32 *)
3970                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3971                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3972                 WREG32(CP_CE_UCODE_ADDR, 0);
3973                 for (i = 0; i < fw_size; i++)
3974                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3975                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3976
3977                 /* ME */
3978                 fw_data = (const __be32 *)
3979                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3980                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3981                 WREG32(CP_ME_RAM_WADDR, 0);
3982                 for (i = 0; i < fw_size; i++)
3983                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3984                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3986         } else {
3987                 const __be32 *fw_data;
3988
3989                 /* PFP */
3990                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3991                 WREG32(CP_PFP_UCODE_ADDR, 0);
3992                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3993                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3994                 WREG32(CP_PFP_UCODE_ADDR, 0);
3995
3996                 /* CE */
3997                 fw_data = (const __be32 *)rdev->ce_fw->data;
3998                 WREG32(CP_CE_UCODE_ADDR, 0);
3999                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4000                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4001                 WREG32(CP_CE_UCODE_ADDR, 0);
4002
4003                 /* ME */
4004                 fw_data = (const __be32 *)rdev->me_fw->data;
4005                 WREG32(CP_ME_RAM_WADDR, 0);
4006                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4007                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4008                 WREG32(CP_ME_RAM_WADDR, 0);
4009         }
4010
4011         return 0;
4012 }
4013
4014 /**
4015  * cik_cp_gfx_start - start the gfx ring
4016  *
4017  * @rdev: radeon_device pointer
4018  *
4019  * Enables the ring and loads the clear state context and other
4020  * packets required to init the ring.
4021  * Returns 0 for success, error for failure.
4022  */
4023 static int cik_cp_gfx_start(struct radeon_device *rdev)
4024 {
4025         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4026         int r, i;
4027
4028         /* init the CP */
4029         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4030         WREG32(CP_ENDIAN_SWAP, 0);
4031         WREG32(CP_DEVICE_ID, 1);
4032
4033         cik_cp_gfx_enable(rdev, true);
4034
4035         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4036         if (r) {
4037                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4038                 return r;
4039         }
4040
4041         /* init the CE partitions.  CE only used for gfx on CIK */
4042         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4043         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4044         radeon_ring_write(ring, 0x8000);
4045         radeon_ring_write(ring, 0x8000);
4046
4047         /* setup clear context state */
4048         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4049         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4050
4051         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4052         radeon_ring_write(ring, 0x80000000);
4053         radeon_ring_write(ring, 0x80000000);
4054
4055         for (i = 0; i < cik_default_size; i++)
4056                 radeon_ring_write(ring, cik_default_state[i]);
4057
4058         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4059         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4060
4061         /* set clear context state */
4062         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4063         radeon_ring_write(ring, 0);
4064
4065         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4066         radeon_ring_write(ring, 0x00000316);
4067         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4068         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4069
4070         radeon_ring_unlock_commit(rdev, ring, false);
4071
4072         return 0;
4073 }
4074
4075 /**
4076  * cik_cp_gfx_fini - stop the gfx ring
4077  *
4078  * @rdev: radeon_device pointer
4079  *
4080  * Stop the gfx ring and tear down the driver ring
4081  * info.
4082  */
4083 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4084 {
4085         cik_cp_gfx_enable(rdev, false);
4086         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4087 }
4088
4089 /**
4090  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4091  *
4092  * @rdev: radeon_device pointer
4093  *
4094  * Program the location and size of the gfx ring buffer
4095  * and test it to make sure it's working.
4096  * Returns 0 for success, error for failure.
4097  */
4098 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4099 {
4100         struct radeon_ring *ring;
4101         u32 tmp;
4102         u32 rb_bufsz;
4103         u64 rb_addr;
4104         int r;
4105
4106         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4107         if (rdev->family != CHIP_HAWAII)
4108                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4109
4110         /* Set the write pointer delay */
4111         WREG32(CP_RB_WPTR_DELAY, 0);
4112
4113         /* set the RB to use vmid 0 */
4114         WREG32(CP_RB_VMID, 0);
4115
4116         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4117
4118         /* ring 0 - compute and gfx */
4119         /* Set ring buffer size */
4120         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4121         rb_bufsz = order_base_2(ring->ring_size / 8);
4122         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4123 #ifdef __BIG_ENDIAN
4124         tmp |= BUF_SWAP_32BIT;
4125 #endif
4126         WREG32(CP_RB0_CNTL, tmp);
4127
4128         /* Initialize the ring buffer's read and write pointers */
4129         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4130         ring->wptr = 0;
4131         WREG32(CP_RB0_WPTR, ring->wptr);
4132
4133         /* set the wb address wether it's enabled or not */
4134         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4135         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4136
4137         /* scratch register shadowing is no longer supported */
4138         WREG32(SCRATCH_UMSK, 0);
4139
4140         if (!rdev->wb.enabled)
4141                 tmp |= RB_NO_UPDATE;
4142
4143         mdelay(1);
4144         WREG32(CP_RB0_CNTL, tmp);
4145
4146         rb_addr = ring->gpu_addr >> 8;
4147         WREG32(CP_RB0_BASE, rb_addr);
4148         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4149
4150         /* start the ring */
4151         cik_cp_gfx_start(rdev);
4152         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4153         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4154         if (r) {
4155                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4156                 return r;
4157         }
4158
4159         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4160                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4161
4162         return 0;
4163 }
4164
4165 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4166                      struct radeon_ring *ring)
4167 {
4168         u32 rptr;
4169
4170         if (rdev->wb.enabled)
4171                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4172         else
4173                 rptr = RREG32(CP_RB0_RPTR);
4174
4175         return rptr;
4176 }
4177
4178 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4179                      struct radeon_ring *ring)
4180 {
4181         return RREG32(CP_RB0_WPTR);
4182 }
4183
4184 void cik_gfx_set_wptr(struct radeon_device *rdev,
4185                       struct radeon_ring *ring)
4186 {
4187         WREG32(CP_RB0_WPTR, ring->wptr);
4188         (void)RREG32(CP_RB0_WPTR);
4189 }
4190
4191 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4192                          struct radeon_ring *ring)
4193 {
4194         u32 rptr;
4195
4196         if (rdev->wb.enabled) {
4197                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4198         } else {
4199                 mutex_lock(&rdev->srbm_mutex);
4200                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4201                 rptr = RREG32(CP_HQD_PQ_RPTR);
4202                 cik_srbm_select(rdev, 0, 0, 0, 0);
4203                 mutex_unlock(&rdev->srbm_mutex);
4204         }
4205
4206         return rptr;
4207 }
4208
4209 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4210                          struct radeon_ring *ring)
4211 {
4212         u32 wptr;
4213
4214         if (rdev->wb.enabled) {
4215                 /* XXX check if swapping is necessary on BE */
4216                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4217         } else {
4218                 mutex_lock(&rdev->srbm_mutex);
4219                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4220                 wptr = RREG32(CP_HQD_PQ_WPTR);
4221                 cik_srbm_select(rdev, 0, 0, 0, 0);
4222                 mutex_unlock(&rdev->srbm_mutex);
4223         }
4224
4225         return wptr;
4226 }
4227
4228 void cik_compute_set_wptr(struct radeon_device *rdev,
4229                           struct radeon_ring *ring)
4230 {
4231         /* XXX check if swapping is necessary on BE */
4232         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4233         WDOORBELL32(ring->doorbell_index, ring->wptr);
4234 }
4235
4236 static void cik_compute_stop(struct radeon_device *rdev,
4237                              struct radeon_ring *ring)
4238 {
4239         u32 j, tmp;
4240
4241         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4242         /* Disable wptr polling. */
4243         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4244         tmp &= ~WPTR_POLL_EN;
4245         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4246         /* Disable HQD. */
4247         if (RREG32(CP_HQD_ACTIVE) & 1) {
4248                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4249                 for (j = 0; j < rdev->usec_timeout; j++) {
4250                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4251                                 break;
4252                         udelay(1);
4253                 }
4254                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4255                 WREG32(CP_HQD_PQ_RPTR, 0);
4256                 WREG32(CP_HQD_PQ_WPTR, 0);
4257         }
4258         cik_srbm_select(rdev, 0, 0, 0, 0);
4259 }
4260
4261 /**
4262  * cik_cp_compute_enable - enable/disable the compute CP MEs
4263  *
4264  * @rdev: radeon_device pointer
4265  * @enable: enable or disable the MEs
4266  *
4267  * Halts or unhalts the compute MEs.
4268  */
4269 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4270 {
4271         if (enable)
4272                 WREG32(CP_MEC_CNTL, 0);
4273         else {
4274                 /*
4275                  * To make hibernation reliable we need to clear compute ring
4276                  * configuration before halting the compute ring.
4277                  */
4278                 mutex_lock(&rdev->srbm_mutex);
4279                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4280                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4281                 mutex_unlock(&rdev->srbm_mutex);
4282
4283                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4284                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4285                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4286         }
4287         udelay(50);
4288 }
4289
4290 /**
4291  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Loads the compute MEC1&2 ucode.
4296  * Returns 0 for success, -EINVAL if the ucode is not available.
4297  */
4298 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4299 {
4300         int i;
4301
4302         if (!rdev->mec_fw)
4303                 return -EINVAL;
4304
4305         cik_cp_compute_enable(rdev, false);
4306
4307         if (rdev->new_fw) {
4308                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4309                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4310                 const __le32 *fw_data;
4311                 u32 fw_size;
4312
4313                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4314
4315                 /* MEC1 */
4316                 fw_data = (const __le32 *)
4317                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4318                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4319                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4320                 for (i = 0; i < fw_size; i++)
4321                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4322                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4323
4324                 /* MEC2 */
4325                 if (rdev->family == CHIP_KAVERI) {
4326                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4327                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4328
4329                         fw_data = (const __le32 *)
4330                                 (rdev->mec2_fw->data +
4331                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4332                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4333                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4334                         for (i = 0; i < fw_size; i++)
4335                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4336                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4337                 }
4338         } else {
4339                 const __be32 *fw_data;
4340
4341                 /* MEC1 */
4342                 fw_data = (const __be32 *)rdev->mec_fw->data;
4343                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4344                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4345                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4346                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4347
4348                 if (rdev->family == CHIP_KAVERI) {
4349                         /* MEC2 */
4350                         fw_data = (const __be32 *)rdev->mec_fw->data;
4351                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4352                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4353                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4354                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4355                 }
4356         }
4357
4358         return 0;
4359 }
4360
4361 /**
4362  * cik_cp_compute_start - start the compute queues
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Enable the compute queues.
4367  * Returns 0 for success, error for failure.
4368  */
4369 static int cik_cp_compute_start(struct radeon_device *rdev)
4370 {
4371         cik_cp_compute_enable(rdev, true);
4372
4373         return 0;
4374 }
4375
4376 /**
4377  * cik_cp_compute_fini - stop the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Stop the compute queues and tear down the driver queue
4382  * info.
4383  */
4384 static void cik_cp_compute_fini(struct radeon_device *rdev)
4385 {
4386         int i, idx, r;
4387
4388         cik_cp_compute_enable(rdev, false);
4389
4390         for (i = 0; i < 2; i++) {
4391                 if (i == 0)
4392                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4393                 else
4394                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4395
4396                 if (rdev->ring[idx].mqd_obj) {
4397                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4398                         if (unlikely(r != 0))
4399                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4400
4401                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4402                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4403
4404                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4405                         rdev->ring[idx].mqd_obj = NULL;
4406                 }
4407         }
4408 }
4409
4410 static void cik_mec_fini(struct radeon_device *rdev)
4411 {
4412         int r;
4413
4414         if (rdev->mec.hpd_eop_obj) {
4415                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4416                 if (unlikely(r != 0))
4417                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4418                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4419                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4420
4421                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4422                 rdev->mec.hpd_eop_obj = NULL;
4423         }
4424 }
4425
4426 #define MEC_HPD_SIZE 2048
4427
4428 static int cik_mec_init(struct radeon_device *rdev)
4429 {
4430         int r;
4431         u32 *hpd;
4432
4433         /*
4434          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4435          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4436          * Nonetheless, we assign only 1 pipe because all other pipes will
4437          * be handled by KFD
4438          */
4439         rdev->mec.num_mec = 1;
4440         rdev->mec.num_pipe = 1;
4441         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4442
4443         if (rdev->mec.hpd_eop_obj == NULL) {
4444                 r = radeon_bo_create(rdev,
4445                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4446                                      PAGE_SIZE, true,
4447                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4448                                      &rdev->mec.hpd_eop_obj);
4449                 if (r) {
4450                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4451                         return r;
4452                 }
4453         }
4454
4455         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4456         if (unlikely(r != 0)) {
4457                 cik_mec_fini(rdev);
4458                 return r;
4459         }
4460         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4461                           &rdev->mec.hpd_eop_gpu_addr);
4462         if (r) {
4463                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4464                 cik_mec_fini(rdev);
4465                 return r;
4466         }
4467         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4468         if (r) {
4469                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4470                 cik_mec_fini(rdev);
4471                 return r;
4472         }
4473
4474         /* clear memory.  Not sure if this is required or not */
4475         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4476
4477         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4478         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4479
4480         return 0;
4481 }
4482
4483 struct hqd_registers
4484 {
4485         u32 cp_mqd_base_addr;
4486         u32 cp_mqd_base_addr_hi;
4487         u32 cp_hqd_active;
4488         u32 cp_hqd_vmid;
4489         u32 cp_hqd_persistent_state;
4490         u32 cp_hqd_pipe_priority;
4491         u32 cp_hqd_queue_priority;
4492         u32 cp_hqd_quantum;
4493         u32 cp_hqd_pq_base;
4494         u32 cp_hqd_pq_base_hi;
4495         u32 cp_hqd_pq_rptr;
4496         u32 cp_hqd_pq_rptr_report_addr;
4497         u32 cp_hqd_pq_rptr_report_addr_hi;
4498         u32 cp_hqd_pq_wptr_poll_addr;
4499         u32 cp_hqd_pq_wptr_poll_addr_hi;
4500         u32 cp_hqd_pq_doorbell_control;
4501         u32 cp_hqd_pq_wptr;
4502         u32 cp_hqd_pq_control;
4503         u32 cp_hqd_ib_base_addr;
4504         u32 cp_hqd_ib_base_addr_hi;
4505         u32 cp_hqd_ib_rptr;
4506         u32 cp_hqd_ib_control;
4507         u32 cp_hqd_iq_timer;
4508         u32 cp_hqd_iq_rptr;
4509         u32 cp_hqd_dequeue_request;
4510         u32 cp_hqd_dma_offload;
4511         u32 cp_hqd_sema_cmd;
4512         u32 cp_hqd_msg_type;
4513         u32 cp_hqd_atomic0_preop_lo;
4514         u32 cp_hqd_atomic0_preop_hi;
4515         u32 cp_hqd_atomic1_preop_lo;
4516         u32 cp_hqd_atomic1_preop_hi;
4517         u32 cp_hqd_hq_scheduler0;
4518         u32 cp_hqd_hq_scheduler1;
4519         u32 cp_mqd_control;
4520 };
4521
4522 struct bonaire_mqd
4523 {
4524         u32 header;
4525         u32 dispatch_initiator;
4526         u32 dimensions[3];
4527         u32 start_idx[3];
4528         u32 num_threads[3];
4529         u32 pipeline_stat_enable;
4530         u32 perf_counter_enable;
4531         u32 pgm[2];
4532         u32 tba[2];
4533         u32 tma[2];
4534         u32 pgm_rsrc[2];
4535         u32 vmid;
4536         u32 resource_limits;
4537         u32 static_thread_mgmt01[2];
4538         u32 tmp_ring_size;
4539         u32 static_thread_mgmt23[2];
4540         u32 restart[3];
4541         u32 thread_trace_enable;
4542         u32 reserved1;
4543         u32 user_data[16];
4544         u32 vgtcs_invoke_count[2];
4545         struct hqd_registers queue_state;
4546         u32 dequeue_cntr;
4547         u32 interrupt_queue[64];
4548 };
4549
4550 /**
4551  * cik_cp_compute_resume - setup the compute queue registers
4552  *
4553  * @rdev: radeon_device pointer
4554  *
4555  * Program the compute queues and test them to make sure they
4556  * are working.
4557  * Returns 0 for success, error for failure.
4558  */
4559 static int cik_cp_compute_resume(struct radeon_device *rdev)
4560 {
4561         int r, i, j, idx;
4562         u32 tmp;
4563         bool use_doorbell = true;
4564         u64 hqd_gpu_addr;
4565         u64 mqd_gpu_addr;
4566         u64 eop_gpu_addr;
4567         u64 wb_gpu_addr;
4568         u32 *buf;
4569         struct bonaire_mqd *mqd;
4570
4571         r = cik_cp_compute_start(rdev);
4572         if (r)
4573                 return r;
4574
4575         /* fix up chicken bits */
4576         tmp = RREG32(CP_CPF_DEBUG);
4577         tmp |= (1 << 23);
4578         WREG32(CP_CPF_DEBUG, tmp);
4579
4580         /* init the pipes */
4581         mutex_lock(&rdev->srbm_mutex);
4582
4583         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4584
4585         cik_srbm_select(rdev, 0, 0, 0, 0);
4586
4587         /* write the EOP addr */
4588         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4589         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4590
4591         /* set the VMID assigned */
4592         WREG32(CP_HPD_EOP_VMID, 0);
4593
4594         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4595         tmp = RREG32(CP_HPD_EOP_CONTROL);
4596         tmp &= ~EOP_SIZE_MASK;
4597         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4598         WREG32(CP_HPD_EOP_CONTROL, tmp);
4599
4600         mutex_unlock(&rdev->srbm_mutex);
4601
4602         /* init the queues.  Just two for now. */
4603         for (i = 0; i < 2; i++) {
4604                 if (i == 0)
4605                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4606                 else
4607                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4608
4609                 if (rdev->ring[idx].mqd_obj == NULL) {
4610                         r = radeon_bo_create(rdev,
4611                                              sizeof(struct bonaire_mqd),
4612                                              PAGE_SIZE, true,
4613                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4614                                              NULL, &rdev->ring[idx].mqd_obj);
4615                         if (r) {
4616                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4617                                 return r;
4618                         }
4619                 }
4620
4621                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4622                 if (unlikely(r != 0)) {
4623                         cik_cp_compute_fini(rdev);
4624                         return r;
4625                 }
4626                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4627                                   &mqd_gpu_addr);
4628                 if (r) {
4629                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4630                         cik_cp_compute_fini(rdev);
4631                         return r;
4632                 }
4633                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4634                 if (r) {
4635                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4636                         cik_cp_compute_fini(rdev);
4637                         return r;
4638                 }
4639
4640                 /* init the mqd struct */
4641                 memset(buf, 0, sizeof(struct bonaire_mqd));
4642
4643                 mqd = (struct bonaire_mqd *)buf;
4644                 mqd->header = 0xC0310800;
4645                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4646                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4647                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4648                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4649
4650                 mutex_lock(&rdev->srbm_mutex);
4651                 cik_srbm_select(rdev, rdev->ring[idx].me,
4652                                 rdev->ring[idx].pipe,
4653                                 rdev->ring[idx].queue, 0);
4654
4655                 /* disable wptr polling */
4656                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4657                 tmp &= ~WPTR_POLL_EN;
4658                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4659
4660                 /* enable doorbell? */
4661                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4662                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4663                 if (use_doorbell)
4664                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4665                 else
4666                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4667                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4668                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4669
4670                 /* disable the queue if it's active */
4671                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4672                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4673                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4674                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4675                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4676                         for (j = 0; j < rdev->usec_timeout; j++) {
4677                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4678                                         break;
4679                                 udelay(1);
4680                         }
4681                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4682                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4683                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4684                 }
4685
4686                 /* set the pointer to the MQD */
4687                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4688                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4689                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4690                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4691                 /* set MQD vmid to 0 */
4692                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4693                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4694                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4695
4696                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4697                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4698                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4699                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4700                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4701                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4702
4703                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4704                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4705                 mqd->queue_state.cp_hqd_pq_control &=
4706                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4707
4708                 mqd->queue_state.cp_hqd_pq_control |=
4709                         order_base_2(rdev->ring[idx].ring_size / 8);
4710                 mqd->queue_state.cp_hqd_pq_control |=
4711                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4712 #ifdef __BIG_ENDIAN
4713                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4714 #endif
4715                 mqd->queue_state.cp_hqd_pq_control &=
4716                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4717                 mqd->queue_state.cp_hqd_pq_control |=
4718                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4719                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4720
4721                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4722                 if (i == 0)
4723                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4724                 else
4725                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4726                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4727                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4728                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4729                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4730                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4731
4732                 /* set the wb address wether it's enabled or not */
4733                 if (i == 0)
4734                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4735                 else
4736                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4737                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4738                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4739                         upper_32_bits(wb_gpu_addr) & 0xffff;
4740                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4741                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4742                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4743                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4744
4745                 /* enable the doorbell if requested */
4746                 if (use_doorbell) {
4747                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4748                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4749                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4750                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4751                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4752                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4753                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4754                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4755
4756                 } else {
4757                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4758                 }
4759                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4760                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4761
4762                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4763                 rdev->ring[idx].wptr = 0;
4764                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4765                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4766                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4767
4768                 /* set the vmid for the queue */
4769                 mqd->queue_state.cp_hqd_vmid = 0;
4770                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4771
4772                 /* activate the queue */
4773                 mqd->queue_state.cp_hqd_active = 1;
4774                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4775
4776                 cik_srbm_select(rdev, 0, 0, 0, 0);
4777                 mutex_unlock(&rdev->srbm_mutex);
4778
4779                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4780                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4781
4782                 rdev->ring[idx].ready = true;
4783                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4784                 if (r)
4785                         rdev->ring[idx].ready = false;
4786         }
4787
4788         return 0;
4789 }
4790
4791 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4792 {
4793         cik_cp_gfx_enable(rdev, enable);
4794         cik_cp_compute_enable(rdev, enable);
4795 }
4796
4797 static int cik_cp_load_microcode(struct radeon_device *rdev)
4798 {
4799         int r;
4800
4801         r = cik_cp_gfx_load_microcode(rdev);
4802         if (r)
4803                 return r;
4804         r = cik_cp_compute_load_microcode(rdev);
4805         if (r)
4806                 return r;
4807
4808         return 0;
4809 }
4810
4811 static void cik_cp_fini(struct radeon_device *rdev)
4812 {
4813         cik_cp_gfx_fini(rdev);
4814         cik_cp_compute_fini(rdev);
4815 }
4816
4817 static int cik_cp_resume(struct radeon_device *rdev)
4818 {
4819         int r;
4820
4821         cik_enable_gui_idle_interrupt(rdev, false);
4822
4823         r = cik_cp_load_microcode(rdev);
4824         if (r)
4825                 return r;
4826
4827         r = cik_cp_gfx_resume(rdev);
4828         if (r)
4829                 return r;
4830         r = cik_cp_compute_resume(rdev);
4831         if (r)
4832                 return r;
4833
4834         cik_enable_gui_idle_interrupt(rdev, true);
4835
4836         return 0;
4837 }
4838
4839 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4840 {
4841         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4842                 RREG32(GRBM_STATUS));
4843         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4844                 RREG32(GRBM_STATUS2));
4845         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4846                 RREG32(GRBM_STATUS_SE0));
4847         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4848                 RREG32(GRBM_STATUS_SE1));
4849         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4850                 RREG32(GRBM_STATUS_SE2));
4851         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4852                 RREG32(GRBM_STATUS_SE3));
4853         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4854                 RREG32(SRBM_STATUS));
4855         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4856                 RREG32(SRBM_STATUS2));
4857         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4858                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4859         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4860                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4861         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4862         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4863                  RREG32(CP_STALLED_STAT1));
4864         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4865                  RREG32(CP_STALLED_STAT2));
4866         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4867                  RREG32(CP_STALLED_STAT3));
4868         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4869                  RREG32(CP_CPF_BUSY_STAT));
4870         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4871                  RREG32(CP_CPF_STALLED_STAT1));
4872         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4873         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4874         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4875                  RREG32(CP_CPC_STALLED_STAT1));
4876         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4877 }
4878
4879 /**
4880  * cik_gpu_check_soft_reset - check which blocks are busy
4881  *
4882  * @rdev: radeon_device pointer
4883  *
4884  * Check which blocks are busy and return the relevant reset
4885  * mask to be used by cik_gpu_soft_reset().
4886  * Returns a mask of the blocks to be reset.
4887  */
4888 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4889 {
4890         u32 reset_mask = 0;
4891         u32 tmp;
4892
4893         /* GRBM_STATUS */
4894         tmp = RREG32(GRBM_STATUS);
4895         if (tmp & (PA_BUSY | SC_BUSY |
4896                    BCI_BUSY | SX_BUSY |
4897                    TA_BUSY | VGT_BUSY |
4898                    DB_BUSY | CB_BUSY |
4899                    GDS_BUSY | SPI_BUSY |
4900                    IA_BUSY | IA_BUSY_NO_DMA))
4901                 reset_mask |= RADEON_RESET_GFX;
4902
4903         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4904                 reset_mask |= RADEON_RESET_CP;
4905
4906         /* GRBM_STATUS2 */
4907         tmp = RREG32(GRBM_STATUS2);
4908         if (tmp & RLC_BUSY)
4909                 reset_mask |= RADEON_RESET_RLC;
4910
4911         /* SDMA0_STATUS_REG */
4912         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4913         if (!(tmp & SDMA_IDLE))
4914                 reset_mask |= RADEON_RESET_DMA;
4915
4916         /* SDMA1_STATUS_REG */
4917         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4918         if (!(tmp & SDMA_IDLE))
4919                 reset_mask |= RADEON_RESET_DMA1;
4920
4921         /* SRBM_STATUS2 */
4922         tmp = RREG32(SRBM_STATUS2);
4923         if (tmp & SDMA_BUSY)
4924                 reset_mask |= RADEON_RESET_DMA;
4925
4926         if (tmp & SDMA1_BUSY)
4927                 reset_mask |= RADEON_RESET_DMA1;
4928
4929         /* SRBM_STATUS */
4930         tmp = RREG32(SRBM_STATUS);
4931
4932         if (tmp & IH_BUSY)
4933                 reset_mask |= RADEON_RESET_IH;
4934
4935         if (tmp & SEM_BUSY)
4936                 reset_mask |= RADEON_RESET_SEM;
4937
4938         if (tmp & GRBM_RQ_PENDING)
4939                 reset_mask |= RADEON_RESET_GRBM;
4940
4941         if (tmp & VMC_BUSY)
4942                 reset_mask |= RADEON_RESET_VMC;
4943
4944         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4945                    MCC_BUSY | MCD_BUSY))
4946                 reset_mask |= RADEON_RESET_MC;
4947
4948         if (evergreen_is_display_hung(rdev))
4949                 reset_mask |= RADEON_RESET_DISPLAY;
4950
4951         /* Skip MC reset as it's mostly likely not hung, just busy */
4952         if (reset_mask & RADEON_RESET_MC) {
4953                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4954                 reset_mask &= ~RADEON_RESET_MC;
4955         }
4956
4957         return reset_mask;
4958 }
4959
4960 /**
4961  * cik_gpu_soft_reset - soft reset GPU
4962  *
4963  * @rdev: radeon_device pointer
4964  * @reset_mask: mask of which blocks to reset
4965  *
4966  * Soft reset the blocks specified in @reset_mask.
4967  */
4968 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4969 {
4970         struct evergreen_mc_save save;
4971         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972         u32 tmp;
4973
4974         if (reset_mask == 0)
4975                 return;
4976
4977         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4978
4979         cik_print_gpu_status_regs(rdev);
4980         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4981                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4982         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4983                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4984
4985         /* disable CG/PG */
4986         cik_fini_pg(rdev);
4987         cik_fini_cg(rdev);
4988
4989         /* stop the rlc */
4990         cik_rlc_stop(rdev);
4991
4992         /* Disable GFX parsing/prefetching */
4993         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4994
4995         /* Disable MEC parsing/prefetching */
4996         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4997
4998         if (reset_mask & RADEON_RESET_DMA) {
4999                 /* sdma0 */
5000                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5001                 tmp |= SDMA_HALT;
5002                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5003         }
5004         if (reset_mask & RADEON_RESET_DMA1) {
5005                 /* sdma1 */
5006                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5007                 tmp |= SDMA_HALT;
5008                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5009         }
5010
5011         evergreen_mc_stop(rdev, &save);
5012         if (evergreen_mc_wait_for_idle(rdev)) {
5013                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5014         }
5015
5016         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5017                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5018
5019         if (reset_mask & RADEON_RESET_CP) {
5020                 grbm_soft_reset |= SOFT_RESET_CP;
5021
5022                 srbm_soft_reset |= SOFT_RESET_GRBM;
5023         }
5024
5025         if (reset_mask & RADEON_RESET_DMA)
5026                 srbm_soft_reset |= SOFT_RESET_SDMA;
5027
5028         if (reset_mask & RADEON_RESET_DMA1)
5029                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5030
5031         if (reset_mask & RADEON_RESET_DISPLAY)
5032                 srbm_soft_reset |= SOFT_RESET_DC;
5033
5034         if (reset_mask & RADEON_RESET_RLC)
5035                 grbm_soft_reset |= SOFT_RESET_RLC;
5036
5037         if (reset_mask & RADEON_RESET_SEM)
5038                 srbm_soft_reset |= SOFT_RESET_SEM;
5039
5040         if (reset_mask & RADEON_RESET_IH)
5041                 srbm_soft_reset |= SOFT_RESET_IH;
5042
5043         if (reset_mask & RADEON_RESET_GRBM)
5044                 srbm_soft_reset |= SOFT_RESET_GRBM;
5045
5046         if (reset_mask & RADEON_RESET_VMC)
5047                 srbm_soft_reset |= SOFT_RESET_VMC;
5048
5049         if (!(rdev->flags & RADEON_IS_IGP)) {
5050                 if (reset_mask & RADEON_RESET_MC)
5051                         srbm_soft_reset |= SOFT_RESET_MC;
5052         }
5053
5054         if (grbm_soft_reset) {
5055                 tmp = RREG32(GRBM_SOFT_RESET);
5056                 tmp |= grbm_soft_reset;
5057                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5058                 WREG32(GRBM_SOFT_RESET, tmp);
5059                 tmp = RREG32(GRBM_SOFT_RESET);
5060
5061                 udelay(50);
5062
5063                 tmp &= ~grbm_soft_reset;
5064                 WREG32(GRBM_SOFT_RESET, tmp);
5065                 tmp = RREG32(GRBM_SOFT_RESET);
5066         }
5067
5068         if (srbm_soft_reset) {
5069                 tmp = RREG32(SRBM_SOFT_RESET);
5070                 tmp |= srbm_soft_reset;
5071                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5072                 WREG32(SRBM_SOFT_RESET, tmp);
5073                 tmp = RREG32(SRBM_SOFT_RESET);
5074
5075                 udelay(50);
5076
5077                 tmp &= ~srbm_soft_reset;
5078                 WREG32(SRBM_SOFT_RESET, tmp);
5079                 tmp = RREG32(SRBM_SOFT_RESET);
5080         }
5081
5082         /* Wait a little for things to settle down */
5083         udelay(50);
5084
5085         evergreen_mc_resume(rdev, &save);
5086         udelay(50);
5087
5088         cik_print_gpu_status_regs(rdev);
5089 }
5090
5091 struct kv_reset_save_regs {
5092         u32 gmcon_reng_execute;
5093         u32 gmcon_misc;
5094         u32 gmcon_misc3;
5095 };
5096
5097 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5098                                    struct kv_reset_save_regs *save)
5099 {
5100         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5101         save->gmcon_misc = RREG32(GMCON_MISC);
5102         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5103
5104         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5105         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5106                                                 STCTRL_STUTTER_EN));
5107 }
5108
5109 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5110                                       struct kv_reset_save_regs *save)
5111 {
5112         int i;
5113
5114         WREG32(GMCON_PGFSM_WRITE, 0);
5115         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5116
5117         for (i = 0; i < 5; i++)
5118                 WREG32(GMCON_PGFSM_WRITE, 0);
5119
5120         WREG32(GMCON_PGFSM_WRITE, 0);
5121         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5122
5123         for (i = 0; i < 5; i++)
5124                 WREG32(GMCON_PGFSM_WRITE, 0);
5125
5126         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5127         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5128
5129         for (i = 0; i < 5; i++)
5130                 WREG32(GMCON_PGFSM_WRITE, 0);
5131
5132         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5133         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5134
5135         for (i = 0; i < 5; i++)
5136                 WREG32(GMCON_PGFSM_WRITE, 0);
5137
5138         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5139         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5140
5141         for (i = 0; i < 5; i++)
5142                 WREG32(GMCON_PGFSM_WRITE, 0);
5143
5144         WREG32(GMCON_PGFSM_WRITE, 0);
5145         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5146
5147         for (i = 0; i < 5; i++)
5148                 WREG32(GMCON_PGFSM_WRITE, 0);
5149
5150         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5151         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5152
5153         for (i = 0; i < 5; i++)
5154                 WREG32(GMCON_PGFSM_WRITE, 0);
5155
5156         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5157         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5158
5159         for (i = 0; i < 5; i++)
5160                 WREG32(GMCON_PGFSM_WRITE, 0);
5161
5162         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5163         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5164
5165         for (i = 0; i < 5; i++)
5166                 WREG32(GMCON_PGFSM_WRITE, 0);
5167
5168         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5169         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5170
5171         for (i = 0; i < 5; i++)
5172                 WREG32(GMCON_PGFSM_WRITE, 0);
5173
5174         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5175         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5176
5177         WREG32(GMCON_MISC3, save->gmcon_misc3);
5178         WREG32(GMCON_MISC, save->gmcon_misc);
5179         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5180 }
5181
5182 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5183 {
5184         struct evergreen_mc_save save;
5185         struct kv_reset_save_regs kv_save = { 0 };
5186         u32 tmp, i;
5187
5188         dev_info(rdev->dev, "GPU pci config reset\n");
5189
5190         /* disable dpm? */
5191
5192         /* disable cg/pg */
5193         cik_fini_pg(rdev);
5194         cik_fini_cg(rdev);
5195
5196         /* Disable GFX parsing/prefetching */
5197         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5198
5199         /* Disable MEC parsing/prefetching */
5200         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5201
5202         /* sdma0 */
5203         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5204         tmp |= SDMA_HALT;
5205         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5206         /* sdma1 */
5207         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5208         tmp |= SDMA_HALT;
5209         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5210         /* XXX other engines? */
5211
5212         /* halt the rlc, disable cp internal ints */
5213         cik_rlc_stop(rdev);
5214
5215         udelay(50);
5216
5217         /* disable mem access */
5218         evergreen_mc_stop(rdev, &save);
5219         if (evergreen_mc_wait_for_idle(rdev)) {
5220                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5221         }
5222
5223         if (rdev->flags & RADEON_IS_IGP)
5224                 kv_save_regs_for_reset(rdev, &kv_save);
5225
5226         /* disable BM */
5227         pci_clear_master(rdev->pdev);
5228         /* reset */
5229         radeon_pci_config_reset(rdev);
5230
5231         udelay(100);
5232
5233         /* wait for asic to come out of reset */
5234         for (i = 0; i < rdev->usec_timeout; i++) {
5235                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5236                         break;
5237                 udelay(1);
5238         }
5239
5240         /* does asic init need to be run first??? */
5241         if (rdev->flags & RADEON_IS_IGP)
5242                 kv_restore_regs_for_reset(rdev, &kv_save);
5243 }
5244
5245 /**
5246  * cik_asic_reset - soft reset GPU
5247  *
5248  * @rdev: radeon_device pointer
5249  * @hard: force hard reset
5250  *
5251  * Look up which blocks are hung and attempt
5252  * to reset them.
5253  * Returns 0 for success.
5254  */
5255 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5256 {
5257         u32 reset_mask;
5258
5259         if (hard) {
5260                 cik_gpu_pci_config_reset(rdev);
5261                 return 0;
5262         }
5263
5264         reset_mask = cik_gpu_check_soft_reset(rdev);
5265
5266         if (reset_mask)
5267                 r600_set_bios_scratch_engine_hung(rdev, true);
5268
5269         /* try soft reset */
5270         cik_gpu_soft_reset(rdev, reset_mask);
5271
5272         reset_mask = cik_gpu_check_soft_reset(rdev);
5273
5274         /* try pci config reset */
5275         if (reset_mask && radeon_hard_reset)
5276                 cik_gpu_pci_config_reset(rdev);
5277
5278         reset_mask = cik_gpu_check_soft_reset(rdev);
5279
5280         if (!reset_mask)
5281                 r600_set_bios_scratch_engine_hung(rdev, false);
5282
5283         return 0;
5284 }
5285
5286 /**
5287  * cik_gfx_is_lockup - check if the 3D engine is locked up
5288  *
5289  * @rdev: radeon_device pointer
5290  * @ring: radeon_ring structure holding ring information
5291  *
5292  * Check if the 3D engine is locked up (CIK).
5293  * Returns true if the engine is locked, false if not.
5294  */
5295 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5296 {
5297         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5298
5299         if (!(reset_mask & (RADEON_RESET_GFX |
5300                             RADEON_RESET_COMPUTE |
5301                             RADEON_RESET_CP))) {
5302                 radeon_ring_lockup_update(rdev, ring);
5303                 return false;
5304         }
5305         return radeon_ring_test_lockup(rdev, ring);
5306 }
5307
5308 /* MC */
5309 /**
5310  * cik_mc_program - program the GPU memory controller
5311  *
5312  * @rdev: radeon_device pointer
5313  *
5314  * Set the location of vram, gart, and AGP in the GPU's
5315  * physical address space (CIK).
5316  */
5317 static void cik_mc_program(struct radeon_device *rdev)
5318 {
5319         struct evergreen_mc_save save;
5320         u32 tmp;
5321         int i, j;
5322
5323         /* Initialize HDP */
5324         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5325                 WREG32((0x2c14 + j), 0x00000000);
5326                 WREG32((0x2c18 + j), 0x00000000);
5327                 WREG32((0x2c1c + j), 0x00000000);
5328                 WREG32((0x2c20 + j), 0x00000000);
5329                 WREG32((0x2c24 + j), 0x00000000);
5330         }
5331         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5332
5333         evergreen_mc_stop(rdev, &save);
5334         if (radeon_mc_wait_for_idle(rdev)) {
5335                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5336         }
5337         /* Lockout access through VGA aperture*/
5338         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5339         /* Update configuration */
5340         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5341                rdev->mc.vram_start >> 12);
5342         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5343                rdev->mc.vram_end >> 12);
5344         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5345                rdev->vram_scratch.gpu_addr >> 12);
5346         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5347         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5348         WREG32(MC_VM_FB_LOCATION, tmp);
5349         /* XXX double check these! */
5350         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5351         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5352         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5353         WREG32(MC_VM_AGP_BASE, 0);
5354         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5355         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5356         if (radeon_mc_wait_for_idle(rdev)) {
5357                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5358         }
5359         evergreen_mc_resume(rdev, &save);
5360         /* we need to own VRAM, so turn off the VGA renderer here
5361          * to stop it overwriting our objects */
5362         rv515_vga_render_disable(rdev);
5363 }
5364
5365 /**
5366  * cik_mc_init - initialize the memory controller driver params
5367  *
5368  * @rdev: radeon_device pointer
5369  *
5370  * Look up the amount of vram, vram width, and decide how to place
5371  * vram and gart within the GPU's physical address space (CIK).
5372  * Returns 0 for success.
5373  */
5374 static int cik_mc_init(struct radeon_device *rdev)
5375 {
5376         u32 tmp;
5377         int chansize, numchan;
5378
5379         /* Get VRAM informations */
5380         rdev->mc.vram_is_ddr = true;
5381         tmp = RREG32(MC_ARB_RAMCFG);
5382         if (tmp & CHANSIZE_MASK) {
5383                 chansize = 64;
5384         } else {
5385                 chansize = 32;
5386         }
5387         tmp = RREG32(MC_SHARED_CHMAP);
5388         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5389         case 0:
5390         default:
5391                 numchan = 1;
5392                 break;
5393         case 1:
5394                 numchan = 2;
5395                 break;
5396         case 2:
5397                 numchan = 4;
5398                 break;
5399         case 3:
5400                 numchan = 8;
5401                 break;
5402         case 4:
5403                 numchan = 3;
5404                 break;
5405         case 5:
5406                 numchan = 6;
5407                 break;
5408         case 6:
5409                 numchan = 10;
5410                 break;
5411         case 7:
5412                 numchan = 12;
5413                 break;
5414         case 8:
5415                 numchan = 16;
5416                 break;
5417         }
5418         rdev->mc.vram_width = numchan * chansize;
5419         /* Could aper size report 0 ? */
5420         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5421         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5422         /* size in MB on si */
5423         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5424         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5426         si_vram_gtt_location(rdev, &rdev->mc);
5427         radeon_update_bandwidth_info(rdev);
5428
5429         return 0;
5430 }
5431
5432 /*
5433  * GART
5434  * VMID 0 is the physical GPU addresses as used by the kernel.
5435  * VMIDs 1-15 are used for userspace clients and are handled
5436  * by the radeon vm/hsa code.
5437  */
5438 /**
5439  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5440  *
5441  * @rdev: radeon_device pointer
5442  *
5443  * Flush the TLB for the VMID 0 page table (CIK).
5444  */
5445 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5446 {
5447         /* flush hdp cache */
5448         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5449
5450         /* bits 0-15 are the VM contexts0-15 */
5451         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5452 }
5453
5454 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5455 {
5456         int i;
5457         uint32_t sh_mem_bases, sh_mem_config;
5458
5459         sh_mem_bases = 0x6000 | 0x6000 << 16;
5460         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5461         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5462
5463         mutex_lock(&rdev->srbm_mutex);
5464         for (i = 8; i < 16; i++) {
5465                 cik_srbm_select(rdev, 0, 0, 0, i);
5466                 /* CP and shaders */
5467                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5468                 WREG32(SH_MEM_APE1_BASE, 1);
5469                 WREG32(SH_MEM_APE1_LIMIT, 0);
5470                 WREG32(SH_MEM_BASES, sh_mem_bases);
5471         }
5472         cik_srbm_select(rdev, 0, 0, 0, 0);
5473         mutex_unlock(&rdev->srbm_mutex);
5474 }
5475
5476 /**
5477  * cik_pcie_gart_enable - gart enable
5478  *
5479  * @rdev: radeon_device pointer
5480  *
5481  * This sets up the TLBs, programs the page tables for VMID0,
5482  * sets up the hw for VMIDs 1-15 which are allocated on
5483  * demand, and sets up the global locations for the LDS, GDS,
5484  * and GPUVM for FSA64 clients (CIK).
5485  * Returns 0 for success, errors for failure.
5486  */
5487 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5488 {
5489         int r, i;
5490
5491         if (rdev->gart.robj == NULL) {
5492                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5493                 return -EINVAL;
5494         }
5495         r = radeon_gart_table_vram_pin(rdev);
5496         if (r)
5497                 return r;
5498         /* Setup TLB control */
5499         WREG32(MC_VM_MX_L1_TLB_CNTL,
5500                (0xA << 7) |
5501                ENABLE_L1_TLB |
5502                ENABLE_L1_FRAGMENT_PROCESSING |
5503                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5504                ENABLE_ADVANCED_DRIVER_MODEL |
5505                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5506         /* Setup L2 cache */
5507         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5508                ENABLE_L2_FRAGMENT_PROCESSING |
5509                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5510                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5511                EFFECTIVE_L2_QUEUE_SIZE(7) |
5512                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5513         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5514         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5515                BANK_SELECT(4) |
5516                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5517         /* setup context0 */
5518         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5519         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5520         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5521         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5522                         (u32)(rdev->dummy_page.addr >> 12));
5523         WREG32(VM_CONTEXT0_CNTL2, 0);
5524         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5525                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5526
5527         WREG32(0x15D4, 0);
5528         WREG32(0x15D8, 0);
5529         WREG32(0x15DC, 0);
5530
5531         /* restore context1-15 */
5532         /* set vm size, must be a multiple of 4 */
5533         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5534         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5535         for (i = 1; i < 16; i++) {
5536                 if (i < 8)
5537                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5538                                rdev->vm_manager.saved_table_addr[i]);
5539                 else
5540                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5541                                rdev->vm_manager.saved_table_addr[i]);
5542         }
5543
5544         /* enable context1-15 */
5545         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5546                (u32)(rdev->dummy_page.addr >> 12));
5547         WREG32(VM_CONTEXT1_CNTL2, 4);
5548         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5549                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5550                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5551                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5552                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5553                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5554                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5555                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5556                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5557                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5558                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5559                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5560                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5561                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5562
5563         if (rdev->family == CHIP_KAVERI) {
5564                 u32 tmp = RREG32(CHUB_CONTROL);
5565                 tmp &= ~BYPASS_VM;
5566                 WREG32(CHUB_CONTROL, tmp);
5567         }
5568
5569         /* XXX SH_MEM regs */
5570         /* where to put LDS, scratch, GPUVM in FSA64 space */
5571         mutex_lock(&rdev->srbm_mutex);
5572         for (i = 0; i < 16; i++) {
5573                 cik_srbm_select(rdev, 0, 0, 0, i);
5574                 /* CP and shaders */
5575                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5576                 WREG32(SH_MEM_APE1_BASE, 1);
5577                 WREG32(SH_MEM_APE1_LIMIT, 0);
5578                 WREG32(SH_MEM_BASES, 0);
5579                 /* SDMA GFX */
5580                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5581                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5582                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5583                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5584                 /* XXX SDMA RLC - todo */
5585         }
5586         cik_srbm_select(rdev, 0, 0, 0, 0);
5587         mutex_unlock(&rdev->srbm_mutex);
5588
5589         cik_pcie_init_compute_vmid(rdev);
5590
5591         cik_pcie_gart_tlb_flush(rdev);
5592         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5593                  (unsigned)(rdev->mc.gtt_size >> 20),
5594                  (unsigned long long)rdev->gart.table_addr);
5595         rdev->gart.ready = true;
5596         return 0;
5597 }
5598
5599 /**
5600  * cik_pcie_gart_disable - gart disable
5601  *
5602  * @rdev: radeon_device pointer
5603  *
5604  * This disables all VM page table (CIK).
5605  */
5606 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5607 {
5608         unsigned i;
5609
5610         for (i = 1; i < 16; ++i) {
5611                 uint32_t reg;
5612                 if (i < 8)
5613                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5614                 else
5615                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5616                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5617         }
5618
5619         /* Disable all tables */
5620         WREG32(VM_CONTEXT0_CNTL, 0);
5621         WREG32(VM_CONTEXT1_CNTL, 0);
5622         /* Setup TLB control */
5623         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5624                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5625         /* Setup L2 cache */
5626         WREG32(VM_L2_CNTL,
5627                ENABLE_L2_FRAGMENT_PROCESSING |
5628                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5629                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5630                EFFECTIVE_L2_QUEUE_SIZE(7) |
5631                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5632         WREG32(VM_L2_CNTL2, 0);
5633         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5634                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5635         radeon_gart_table_vram_unpin(rdev);
5636 }
5637
5638 /**
5639  * cik_pcie_gart_fini - vm fini callback
5640  *
5641  * @rdev: radeon_device pointer
5642  *
5643  * Tears down the driver GART/VM setup (CIK).
5644  */
5645 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5646 {
5647         cik_pcie_gart_disable(rdev);
5648         radeon_gart_table_vram_free(rdev);
5649         radeon_gart_fini(rdev);
5650 }
5651
5652 /* vm parser */
5653 /**
5654  * cik_ib_parse - vm ib_parse callback
5655  *
5656  * @rdev: radeon_device pointer
5657  * @ib: indirect buffer pointer
5658  *
5659  * CIK uses hw IB checking so this is a nop (CIK).
5660  */
5661 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5662 {
5663         return 0;
5664 }
5665
5666 /*
5667  * vm
5668  * VMID 0 is the physical GPU addresses as used by the kernel.
5669  * VMIDs 1-15 are used for userspace clients and are handled
5670  * by the radeon vm/hsa code.
5671  */
5672 /**
5673  * cik_vm_init - cik vm init callback
5674  *
5675  * @rdev: radeon_device pointer
5676  *
5677  * Inits cik specific vm parameters (number of VMs, base of vram for
5678  * VMIDs 1-15) (CIK).
5679  * Returns 0 for success.
5680  */
5681 int cik_vm_init(struct radeon_device *rdev)
5682 {
5683         /*
5684          * number of VMs
5685          * VMID 0 is reserved for System
5686          * radeon graphics/compute will use VMIDs 1-7
5687          * amdkfd will use VMIDs 8-15
5688          */
5689         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5690         /* base offset of vram pages */
5691         if (rdev->flags & RADEON_IS_IGP) {
5692                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5693                 tmp <<= 22;
5694                 rdev->vm_manager.vram_base_offset = tmp;
5695         } else
5696                 rdev->vm_manager.vram_base_offset = 0;
5697
5698         return 0;
5699 }
5700
5701 /**
5702  * cik_vm_fini - cik vm fini callback
5703  *
5704  * @rdev: radeon_device pointer
5705  *
5706  * Tear down any asic specific VM setup (CIK).
5707  */
5708 void cik_vm_fini(struct radeon_device *rdev)
5709 {
5710 }
5711
5712 /**
5713  * cik_vm_decode_fault - print human readable fault info
5714  *
5715  * @rdev: radeon_device pointer
5716  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5717  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5718  *
5719  * Print human readable fault information (CIK).
5720  */
5721 static void cik_vm_decode_fault(struct radeon_device *rdev,
5722                                 u32 status, u32 addr, u32 mc_client)
5723 {
5724         u32 mc_id;
5725         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5726         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5727         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5728                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5729
5730         if (rdev->family == CHIP_HAWAII)
5731                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5732         else
5733                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5734
5735         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5736                protections, vmid, addr,
5737                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5738                block, mc_client, mc_id);
5739 }
5740
5741 /**
5742  * cik_vm_flush - cik vm flush using the CP
5743  *
5744  * @rdev: radeon_device pointer
5745  *
5746  * Update the page table base and flush the VM TLB
5747  * using the CP (CIK).
5748  */
5749 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5750                   unsigned vm_id, uint64_t pd_addr)
5751 {
5752         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5753
5754         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5755         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5756                                  WRITE_DATA_DST_SEL(0)));
5757         if (vm_id < 8) {
5758                 radeon_ring_write(ring,
5759                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5760         } else {
5761                 radeon_ring_write(ring,
5762                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5763         }
5764         radeon_ring_write(ring, 0);
5765         radeon_ring_write(ring, pd_addr >> 12);
5766
5767         /* update SH_MEM_* regs */
5768         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5769         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5770                                  WRITE_DATA_DST_SEL(0)));
5771         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5772         radeon_ring_write(ring, 0);
5773         radeon_ring_write(ring, VMID(vm_id));
5774
5775         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5776         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5777                                  WRITE_DATA_DST_SEL(0)));
5778         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5779         radeon_ring_write(ring, 0);
5780
5781         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5782         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5783         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5784         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5785
5786         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5787         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5788                                  WRITE_DATA_DST_SEL(0)));
5789         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5790         radeon_ring_write(ring, 0);
5791         radeon_ring_write(ring, VMID(0));
5792
5793         /* HDP flush */
5794         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5795
5796         /* bits 0-15 are the VM contexts0-15 */
5797         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5798         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5799                                  WRITE_DATA_DST_SEL(0)));
5800         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5801         radeon_ring_write(ring, 0);
5802         radeon_ring_write(ring, 1 << vm_id);
5803
5804         /* wait for the invalidate to complete */
5805         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5806         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5807                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5808                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5809         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5810         radeon_ring_write(ring, 0);
5811         radeon_ring_write(ring, 0); /* ref */
5812         radeon_ring_write(ring, 0); /* mask */
5813         radeon_ring_write(ring, 0x20); /* poll interval */
5814
5815         /* compute doesn't have PFP */
5816         if (usepfp) {
5817                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5818                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5819                 radeon_ring_write(ring, 0x0);
5820         }
5821 }
5822
5823 /*
5824  * RLC
5825  * The RLC is a multi-purpose microengine that handles a
5826  * variety of functions, the most important of which is
5827  * the interrupt controller.
5828  */
5829 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5830                                           bool enable)
5831 {
5832         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5833
5834         if (enable)
5835                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5836         else
5837                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5838         WREG32(CP_INT_CNTL_RING0, tmp);
5839 }
5840
5841 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5842 {
5843         u32 tmp;
5844
5845         tmp = RREG32(RLC_LB_CNTL);
5846         if (enable)
5847                 tmp |= LOAD_BALANCE_ENABLE;
5848         else
5849                 tmp &= ~LOAD_BALANCE_ENABLE;
5850         WREG32(RLC_LB_CNTL, tmp);
5851 }
5852
5853 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5854 {
5855         u32 i, j, k;
5856         u32 mask;
5857
5858         mutex_lock(&rdev->grbm_idx_mutex);
5859         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5860                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5861                         cik_select_se_sh(rdev, i, j);
5862                         for (k = 0; k < rdev->usec_timeout; k++) {
5863                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5864                                         break;
5865                                 udelay(1);
5866                         }
5867                 }
5868         }
5869         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5870         mutex_unlock(&rdev->grbm_idx_mutex);
5871
5872         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5873         for (k = 0; k < rdev->usec_timeout; k++) {
5874                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5875                         break;
5876                 udelay(1);
5877         }
5878 }
5879
5880 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5881 {
5882         u32 tmp;
5883
5884         tmp = RREG32(RLC_CNTL);
5885         if (tmp != rlc)
5886                 WREG32(RLC_CNTL, rlc);
5887 }
5888
5889 static u32 cik_halt_rlc(struct radeon_device *rdev)
5890 {
5891         u32 data, orig;
5892
5893         orig = data = RREG32(RLC_CNTL);
5894
5895         if (data & RLC_ENABLE) {
5896                 u32 i;
5897
5898                 data &= ~RLC_ENABLE;
5899                 WREG32(RLC_CNTL, data);
5900
5901                 for (i = 0; i < rdev->usec_timeout; i++) {
5902                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5903                                 break;
5904                         udelay(1);
5905                 }
5906
5907                 cik_wait_for_rlc_serdes(rdev);
5908         }
5909
5910         return orig;
5911 }
5912
5913 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5914 {
5915         u32 tmp, i, mask;
5916
5917         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5918         WREG32(RLC_GPR_REG2, tmp);
5919
5920         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5921         for (i = 0; i < rdev->usec_timeout; i++) {
5922                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5923                         break;
5924                 udelay(1);
5925         }
5926
5927         for (i = 0; i < rdev->usec_timeout; i++) {
5928                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5929                         break;
5930                 udelay(1);
5931         }
5932 }
5933
5934 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5935 {
5936         u32 tmp;
5937
5938         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5939         WREG32(RLC_GPR_REG2, tmp);
5940 }
5941
5942 /**
5943  * cik_rlc_stop - stop the RLC ME
5944  *
5945  * @rdev: radeon_device pointer
5946  *
5947  * Halt the RLC ME (MicroEngine) (CIK).
5948  */
5949 static void cik_rlc_stop(struct radeon_device *rdev)
5950 {
5951         WREG32(RLC_CNTL, 0);
5952
5953         cik_enable_gui_idle_interrupt(rdev, false);
5954
5955         cik_wait_for_rlc_serdes(rdev);
5956 }
5957
5958 /**
5959  * cik_rlc_start - start the RLC ME
5960  *
5961  * @rdev: radeon_device pointer
5962  *
5963  * Unhalt the RLC ME (MicroEngine) (CIK).
5964  */
5965 static void cik_rlc_start(struct radeon_device *rdev)
5966 {
5967         WREG32(RLC_CNTL, RLC_ENABLE);
5968
5969         cik_enable_gui_idle_interrupt(rdev, true);
5970
5971         udelay(50);
5972 }
5973
5974 /**
5975  * cik_rlc_resume - setup the RLC hw
5976  *
5977  * @rdev: radeon_device pointer
5978  *
5979  * Initialize the RLC registers, load the ucode,
5980  * and start the RLC (CIK).
5981  * Returns 0 for success, -EINVAL if the ucode is not available.
5982  */
5983 static int cik_rlc_resume(struct radeon_device *rdev)
5984 {
5985         u32 i, size, tmp;
5986
5987         if (!rdev->rlc_fw)
5988                 return -EINVAL;
5989
5990         cik_rlc_stop(rdev);
5991
5992         /* disable CG */
5993         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5994         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5995
5996         si_rlc_reset(rdev);
5997
5998         cik_init_pg(rdev);
5999
6000         cik_init_cg(rdev);
6001
6002         WREG32(RLC_LB_CNTR_INIT, 0);
6003         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6004
6005         mutex_lock(&rdev->grbm_idx_mutex);
6006         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6007         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6008         WREG32(RLC_LB_PARAMS, 0x00600408);
6009         WREG32(RLC_LB_CNTL, 0x80000004);
6010         mutex_unlock(&rdev->grbm_idx_mutex);
6011
6012         WREG32(RLC_MC_CNTL, 0);
6013         WREG32(RLC_UCODE_CNTL, 0);
6014
6015         if (rdev->new_fw) {
6016                 const struct rlc_firmware_header_v1_0 *hdr =
6017                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6018                 const __le32 *fw_data = (const __le32 *)
6019                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6020
6021                 radeon_ucode_print_rlc_hdr(&hdr->header);
6022
6023                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6024                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6025                 for (i = 0; i < size; i++)
6026                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6027                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6028         } else {
6029                 const __be32 *fw_data;
6030
6031                 switch (rdev->family) {
6032                 case CHIP_BONAIRE:
6033                 case CHIP_HAWAII:
6034                 default:
6035                         size = BONAIRE_RLC_UCODE_SIZE;
6036                         break;
6037                 case CHIP_KAVERI:
6038                         size = KV_RLC_UCODE_SIZE;
6039                         break;
6040                 case CHIP_KABINI:
6041                         size = KB_RLC_UCODE_SIZE;
6042                         break;
6043                 case CHIP_MULLINS:
6044                         size = ML_RLC_UCODE_SIZE;
6045                         break;
6046                 }
6047
6048                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6049                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6050                 for (i = 0; i < size; i++)
6051                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6052                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6053         }
6054
6055         /* XXX - find out what chips support lbpw */
6056         cik_enable_lbpw(rdev, false);
6057
6058         if (rdev->family == CHIP_BONAIRE)
6059                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6060
6061         cik_rlc_start(rdev);
6062
6063         return 0;
6064 }
6065
6066 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6067 {
6068         u32 data, orig, tmp, tmp2;
6069
6070         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6071
6072         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6073                 cik_enable_gui_idle_interrupt(rdev, true);
6074
6075                 tmp = cik_halt_rlc(rdev);
6076
6077                 mutex_lock(&rdev->grbm_idx_mutex);
6078                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6079                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6080                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6081                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6082                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6083                 mutex_unlock(&rdev->grbm_idx_mutex);
6084
6085                 cik_update_rlc(rdev, tmp);
6086
6087                 data |= CGCG_EN | CGLS_EN;
6088         } else {
6089                 cik_enable_gui_idle_interrupt(rdev, false);
6090
6091                 RREG32(CB_CGTT_SCLK_CTRL);
6092                 RREG32(CB_CGTT_SCLK_CTRL);
6093                 RREG32(CB_CGTT_SCLK_CTRL);
6094                 RREG32(CB_CGTT_SCLK_CTRL);
6095
6096                 data &= ~(CGCG_EN | CGLS_EN);
6097         }
6098
6099         if (orig != data)
6100                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6101
6102 }
6103
6104 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6105 {
6106         u32 data, orig, tmp = 0;
6107
6108         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6109                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6110                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6111                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6112                                 data |= CP_MEM_LS_EN;
6113                                 if (orig != data)
6114                                         WREG32(CP_MEM_SLP_CNTL, data);
6115                         }
6116                 }
6117
6118                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6119                 data |= 0x00000001;
6120                 data &= 0xfffffffd;
6121                 if (orig != data)
6122                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6123
6124                 tmp = cik_halt_rlc(rdev);
6125
6126                 mutex_lock(&rdev->grbm_idx_mutex);
6127                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6128                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6129                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6130                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6131                 WREG32(RLC_SERDES_WR_CTRL, data);
6132                 mutex_unlock(&rdev->grbm_idx_mutex);
6133
6134                 cik_update_rlc(rdev, tmp);
6135
6136                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6137                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6138                         data &= ~SM_MODE_MASK;
6139                         data |= SM_MODE(0x2);
6140                         data |= SM_MODE_ENABLE;
6141                         data &= ~CGTS_OVERRIDE;
6142                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6143                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6144                                 data &= ~CGTS_LS_OVERRIDE;
6145                         data &= ~ON_MONITOR_ADD_MASK;
6146                         data |= ON_MONITOR_ADD_EN;
6147                         data |= ON_MONITOR_ADD(0x96);
6148                         if (orig != data)
6149                                 WREG32(CGTS_SM_CTRL_REG, data);
6150                 }
6151         } else {
6152                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6153                 data |= 0x00000003;
6154                 if (orig != data)
6155                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6156
6157                 data = RREG32(RLC_MEM_SLP_CNTL);
6158                 if (data & RLC_MEM_LS_EN) {
6159                         data &= ~RLC_MEM_LS_EN;
6160                         WREG32(RLC_MEM_SLP_CNTL, data);
6161                 }
6162
6163                 data = RREG32(CP_MEM_SLP_CNTL);
6164                 if (data & CP_MEM_LS_EN) {
6165                         data &= ~CP_MEM_LS_EN;
6166                         WREG32(CP_MEM_SLP_CNTL, data);
6167                 }
6168
6169                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6170                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6171                 if (orig != data)
6172                         WREG32(CGTS_SM_CTRL_REG, data);
6173
6174                 tmp = cik_halt_rlc(rdev);
6175
6176                 mutex_lock(&rdev->grbm_idx_mutex);
6177                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6178                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6179                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6180                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6181                 WREG32(RLC_SERDES_WR_CTRL, data);
6182                 mutex_unlock(&rdev->grbm_idx_mutex);
6183
6184                 cik_update_rlc(rdev, tmp);
6185         }
6186 }
6187
6188 static const u32 mc_cg_registers[] =
6189 {
6190         MC_HUB_MISC_HUB_CG,
6191         MC_HUB_MISC_SIP_CG,
6192         MC_HUB_MISC_VM_CG,
6193         MC_XPB_CLK_GAT,
6194         ATC_MISC_CG,
6195         MC_CITF_MISC_WR_CG,
6196         MC_CITF_MISC_RD_CG,
6197         MC_CITF_MISC_VM_CG,
6198         VM_L2_CG,
6199 };
6200
6201 static void cik_enable_mc_ls(struct radeon_device *rdev,
6202                              bool enable)
6203 {
6204         int i;
6205         u32 orig, data;
6206
6207         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6208                 orig = data = RREG32(mc_cg_registers[i]);
6209                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6210                         data |= MC_LS_ENABLE;
6211                 else
6212                         data &= ~MC_LS_ENABLE;
6213                 if (data != orig)
6214                         WREG32(mc_cg_registers[i], data);
6215         }
6216 }
6217
6218 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6219                                bool enable)
6220 {
6221         int i;
6222         u32 orig, data;
6223
6224         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6225                 orig = data = RREG32(mc_cg_registers[i]);
6226                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6227                         data |= MC_CG_ENABLE;
6228                 else
6229                         data &= ~MC_CG_ENABLE;
6230                 if (data != orig)
6231                         WREG32(mc_cg_registers[i], data);
6232         }
6233 }
6234
6235 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6236                                  bool enable)
6237 {
6238         u32 orig, data;
6239
6240         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6241                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6242                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6243         } else {
6244                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6245                 data |= 0xff000000;
6246                 if (data != orig)
6247                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6248
6249                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6250                 data |= 0xff000000;
6251                 if (data != orig)
6252                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6253         }
6254 }
6255
6256 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6257                                  bool enable)
6258 {
6259         u32 orig, data;
6260
6261         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6262                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6263                 data |= 0x100;
6264                 if (orig != data)
6265                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6266
6267                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6268                 data |= 0x100;
6269                 if (orig != data)
6270                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6271         } else {
6272                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6273                 data &= ~0x100;
6274                 if (orig != data)
6275                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6276
6277                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6278                 data &= ~0x100;
6279                 if (orig != data)
6280                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6281         }
6282 }
6283
6284 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6285                                 bool enable)
6286 {
6287         u32 orig, data;
6288
6289         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6290                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6291                 data = 0xfff;
6292                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6293
6294                 orig = data = RREG32(UVD_CGC_CTRL);
6295                 data |= DCM;
6296                 if (orig != data)
6297                         WREG32(UVD_CGC_CTRL, data);
6298         } else {
6299                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6300                 data &= ~0xfff;
6301                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6302
6303                 orig = data = RREG32(UVD_CGC_CTRL);
6304                 data &= ~DCM;
6305                 if (orig != data)
6306                         WREG32(UVD_CGC_CTRL, data);
6307         }
6308 }
6309
6310 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6311                                bool enable)
6312 {
6313         u32 orig, data;
6314
6315         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6316
6317         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6318                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6319                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6320         else
6321                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6322                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6323
6324         if (orig != data)
6325                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6326 }
6327
6328 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6329                                 bool enable)
6330 {
6331         u32 orig, data;
6332
6333         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6334
6335         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6336                 data &= ~CLOCK_GATING_DIS;
6337         else
6338                 data |= CLOCK_GATING_DIS;
6339
6340         if (orig != data)
6341                 WREG32(HDP_HOST_PATH_CNTL, data);
6342 }
6343
6344 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6345                               bool enable)
6346 {
6347         u32 orig, data;
6348
6349         orig = data = RREG32(HDP_MEM_POWER_LS);
6350
6351         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6352                 data |= HDP_LS_ENABLE;
6353         else
6354                 data &= ~HDP_LS_ENABLE;
6355
6356         if (orig != data)
6357                 WREG32(HDP_MEM_POWER_LS, data);
6358 }
6359
6360 void cik_update_cg(struct radeon_device *rdev,
6361                    u32 block, bool enable)
6362 {
6363
6364         if (block & RADEON_CG_BLOCK_GFX) {
6365                 cik_enable_gui_idle_interrupt(rdev, false);
6366                 /* order matters! */
6367                 if (enable) {
6368                         cik_enable_mgcg(rdev, true);
6369                         cik_enable_cgcg(rdev, true);
6370                 } else {
6371                         cik_enable_cgcg(rdev, false);
6372                         cik_enable_mgcg(rdev, false);
6373                 }
6374                 cik_enable_gui_idle_interrupt(rdev, true);
6375         }
6376
6377         if (block & RADEON_CG_BLOCK_MC) {
6378                 if (!(rdev->flags & RADEON_IS_IGP)) {
6379                         cik_enable_mc_mgcg(rdev, enable);
6380                         cik_enable_mc_ls(rdev, enable);
6381                 }
6382         }
6383
6384         if (block & RADEON_CG_BLOCK_SDMA) {
6385                 cik_enable_sdma_mgcg(rdev, enable);
6386                 cik_enable_sdma_mgls(rdev, enable);
6387         }
6388
6389         if (block & RADEON_CG_BLOCK_BIF) {
6390                 cik_enable_bif_mgls(rdev, enable);
6391         }
6392
6393         if (block & RADEON_CG_BLOCK_UVD) {
6394                 if (rdev->has_uvd)
6395                         cik_enable_uvd_mgcg(rdev, enable);
6396         }
6397
6398         if (block & RADEON_CG_BLOCK_HDP) {
6399                 cik_enable_hdp_mgcg(rdev, enable);
6400                 cik_enable_hdp_ls(rdev, enable);
6401         }
6402
6403         if (block & RADEON_CG_BLOCK_VCE) {
6404                 vce_v2_0_enable_mgcg(rdev, enable);
6405         }
6406 }
6407
6408 static void cik_init_cg(struct radeon_device *rdev)
6409 {
6410
6411         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6412
6413         if (rdev->has_uvd)
6414                 si_init_uvd_internal_cg(rdev);
6415
6416         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6417                              RADEON_CG_BLOCK_SDMA |
6418                              RADEON_CG_BLOCK_BIF |
6419                              RADEON_CG_BLOCK_UVD |
6420                              RADEON_CG_BLOCK_HDP), true);
6421 }
6422
6423 static void cik_fini_cg(struct radeon_device *rdev)
6424 {
6425         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6426                              RADEON_CG_BLOCK_SDMA |
6427                              RADEON_CG_BLOCK_BIF |
6428                              RADEON_CG_BLOCK_UVD |
6429                              RADEON_CG_BLOCK_HDP), false);
6430
6431         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6432 }
6433
6434 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6435                                           bool enable)
6436 {
6437         u32 data, orig;
6438
6439         orig = data = RREG32(RLC_PG_CNTL);
6440         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6441                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6442         else
6443                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6444         if (orig != data)
6445                 WREG32(RLC_PG_CNTL, data);
6446 }
6447
6448 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6449                                           bool enable)
6450 {
6451         u32 data, orig;
6452
6453         orig = data = RREG32(RLC_PG_CNTL);
6454         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6455                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6456         else
6457                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6458         if (orig != data)
6459                 WREG32(RLC_PG_CNTL, data);
6460 }
6461
6462 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6463 {
6464         u32 data, orig;
6465
6466         orig = data = RREG32(RLC_PG_CNTL);
6467         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6468                 data &= ~DISABLE_CP_PG;
6469         else
6470                 data |= DISABLE_CP_PG;
6471         if (orig != data)
6472                 WREG32(RLC_PG_CNTL, data);
6473 }
6474
6475 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6476 {
6477         u32 data, orig;
6478
6479         orig = data = RREG32(RLC_PG_CNTL);
6480         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6481                 data &= ~DISABLE_GDS_PG;
6482         else
6483                 data |= DISABLE_GDS_PG;
6484         if (orig != data)
6485                 WREG32(RLC_PG_CNTL, data);
6486 }
6487
6488 #define CP_ME_TABLE_SIZE    96
6489 #define CP_ME_TABLE_OFFSET  2048
6490 #define CP_MEC_TABLE_OFFSET 4096
6491
6492 void cik_init_cp_pg_table(struct radeon_device *rdev)
6493 {
6494         volatile u32 *dst_ptr;
6495         int me, i, max_me = 4;
6496         u32 bo_offset = 0;
6497         u32 table_offset, table_size;
6498
6499         if (rdev->family == CHIP_KAVERI)
6500                 max_me = 5;
6501
6502         if (rdev->rlc.cp_table_ptr == NULL)
6503                 return;
6504
6505         /* write the cp table buffer */
6506         dst_ptr = rdev->rlc.cp_table_ptr;
6507         for (me = 0; me < max_me; me++) {
6508                 if (rdev->new_fw) {
6509                         const __le32 *fw_data;
6510                         const struct gfx_firmware_header_v1_0 *hdr;
6511
6512                         if (me == 0) {
6513                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6514                                 fw_data = (const __le32 *)
6515                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6516                                 table_offset = le32_to_cpu(hdr->jt_offset);
6517                                 table_size = le32_to_cpu(hdr->jt_size);
6518                         } else if (me == 1) {
6519                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6520                                 fw_data = (const __le32 *)
6521                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6522                                 table_offset = le32_to_cpu(hdr->jt_offset);
6523                                 table_size = le32_to_cpu(hdr->jt_size);
6524                         } else if (me == 2) {
6525                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6526                                 fw_data = (const __le32 *)
6527                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6528                                 table_offset = le32_to_cpu(hdr->jt_offset);
6529                                 table_size = le32_to_cpu(hdr->jt_size);
6530                         } else if (me == 3) {
6531                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6532                                 fw_data = (const __le32 *)
6533                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6534                                 table_offset = le32_to_cpu(hdr->jt_offset);
6535                                 table_size = le32_to_cpu(hdr->jt_size);
6536                         } else {
6537                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6538                                 fw_data = (const __le32 *)
6539                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6540                                 table_offset = le32_to_cpu(hdr->jt_offset);
6541                                 table_size = le32_to_cpu(hdr->jt_size);
6542                         }
6543
6544                         for (i = 0; i < table_size; i ++) {
6545                                 dst_ptr[bo_offset + i] =
6546                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6547                         }
6548                         bo_offset += table_size;
6549                 } else {
6550                         const __be32 *fw_data;
6551                         table_size = CP_ME_TABLE_SIZE;
6552
6553                         if (me == 0) {
6554                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6555                                 table_offset = CP_ME_TABLE_OFFSET;
6556                         } else if (me == 1) {
6557                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6558                                 table_offset = CP_ME_TABLE_OFFSET;
6559                         } else if (me == 2) {
6560                                 fw_data = (const __be32 *)rdev->me_fw->data;
6561                                 table_offset = CP_ME_TABLE_OFFSET;
6562                         } else {
6563                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6564                                 table_offset = CP_MEC_TABLE_OFFSET;
6565                         }
6566
6567                         for (i = 0; i < table_size; i ++) {
6568                                 dst_ptr[bo_offset + i] =
6569                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6570                         }
6571                         bo_offset += table_size;
6572                 }
6573         }
6574 }
6575
6576 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6577                                 bool enable)
6578 {
6579         u32 data, orig;
6580
6581         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6582                 orig = data = RREG32(RLC_PG_CNTL);
6583                 data |= GFX_PG_ENABLE;
6584                 if (orig != data)
6585                         WREG32(RLC_PG_CNTL, data);
6586
6587                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6588                 data |= AUTO_PG_EN;
6589                 if (orig != data)
6590                         WREG32(RLC_AUTO_PG_CTRL, data);
6591         } else {
6592                 orig = data = RREG32(RLC_PG_CNTL);
6593                 data &= ~GFX_PG_ENABLE;
6594                 if (orig != data)
6595                         WREG32(RLC_PG_CNTL, data);
6596
6597                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6598                 data &= ~AUTO_PG_EN;
6599                 if (orig != data)
6600                         WREG32(RLC_AUTO_PG_CTRL, data);
6601
6602                 data = RREG32(DB_RENDER_CONTROL);
6603         }
6604 }
6605
6606 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6607 {
6608         u32 mask = 0, tmp, tmp1;
6609         int i;
6610
6611         mutex_lock(&rdev->grbm_idx_mutex);
6612         cik_select_se_sh(rdev, se, sh);
6613         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6614         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6615         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6616         mutex_unlock(&rdev->grbm_idx_mutex);
6617
6618         tmp &= 0xffff0000;
6619
6620         tmp |= tmp1;
6621         tmp >>= 16;
6622
6623         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6624                 mask <<= 1;
6625                 mask |= 1;
6626         }
6627
6628         return (~tmp) & mask;
6629 }
6630
6631 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6632 {
6633         u32 i, j, k, active_cu_number = 0;
6634         u32 mask, counter, cu_bitmap;
6635         u32 tmp = 0;
6636
6637         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6638                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6639                         mask = 1;
6640                         cu_bitmap = 0;
6641                         counter = 0;
6642                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6643                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6644                                         if (counter < 2)
6645                                                 cu_bitmap |= mask;
6646                                         counter ++;
6647                                 }
6648                                 mask <<= 1;
6649                         }
6650
6651                         active_cu_number += counter;
6652                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6653                 }
6654         }
6655
6656         WREG32(RLC_PG_AO_CU_MASK, tmp);
6657
6658         tmp = RREG32(RLC_MAX_PG_CU);
6659         tmp &= ~MAX_PU_CU_MASK;
6660         tmp |= MAX_PU_CU(active_cu_number);
6661         WREG32(RLC_MAX_PG_CU, tmp);
6662 }
6663
6664 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6665                                        bool enable)
6666 {
6667         u32 data, orig;
6668
6669         orig = data = RREG32(RLC_PG_CNTL);
6670         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6671                 data |= STATIC_PER_CU_PG_ENABLE;
6672         else
6673                 data &= ~STATIC_PER_CU_PG_ENABLE;
6674         if (orig != data)
6675                 WREG32(RLC_PG_CNTL, data);
6676 }
6677
6678 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6679                                         bool enable)
6680 {
6681         u32 data, orig;
6682
6683         orig = data = RREG32(RLC_PG_CNTL);
6684         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6685                 data |= DYN_PER_CU_PG_ENABLE;
6686         else
6687                 data &= ~DYN_PER_CU_PG_ENABLE;
6688         if (orig != data)
6689                 WREG32(RLC_PG_CNTL, data);
6690 }
6691
6692 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6693 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6694
6695 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6696 {
6697         u32 data, orig;
6698         u32 i;
6699
6700         if (rdev->rlc.cs_data) {
6701                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6702                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6703                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6704                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6705         } else {
6706                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6707                 for (i = 0; i < 3; i++)
6708                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6709         }
6710         if (rdev->rlc.reg_list) {
6711                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6712                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6713                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6714         }
6715
6716         orig = data = RREG32(RLC_PG_CNTL);
6717         data |= GFX_PG_SRC;
6718         if (orig != data)
6719                 WREG32(RLC_PG_CNTL, data);
6720
6721         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6722         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6723
6724         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6725         data &= ~IDLE_POLL_COUNT_MASK;
6726         data |= IDLE_POLL_COUNT(0x60);
6727         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6728
6729         data = 0x10101010;
6730         WREG32(RLC_PG_DELAY, data);
6731
6732         data = RREG32(RLC_PG_DELAY_2);
6733         data &= ~0xff;
6734         data |= 0x3;
6735         WREG32(RLC_PG_DELAY_2, data);
6736
6737         data = RREG32(RLC_AUTO_PG_CTRL);
6738         data &= ~GRBM_REG_SGIT_MASK;
6739         data |= GRBM_REG_SGIT(0x700);
6740         WREG32(RLC_AUTO_PG_CTRL, data);
6741
6742 }
6743
6744 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6745 {
6746         cik_enable_gfx_cgpg(rdev, enable);
6747         cik_enable_gfx_static_mgpg(rdev, enable);
6748         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6749 }
6750
6751 u32 cik_get_csb_size(struct radeon_device *rdev)
6752 {
6753         u32 count = 0;
6754         const struct cs_section_def *sect = NULL;
6755         const struct cs_extent_def *ext = NULL;
6756
6757         if (rdev->rlc.cs_data == NULL)
6758                 return 0;
6759
6760         /* begin clear state */
6761         count += 2;
6762         /* context control state */
6763         count += 3;
6764
6765         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6766                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6767                         if (sect->id == SECT_CONTEXT)
6768                                 count += 2 + ext->reg_count;
6769                         else
6770                                 return 0;
6771                 }
6772         }
6773         /* pa_sc_raster_config/pa_sc_raster_config1 */
6774         count += 4;
6775         /* end clear state */
6776         count += 2;
6777         /* clear state */
6778         count += 2;
6779
6780         return count;
6781 }
6782
6783 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6784 {
6785         u32 count = 0, i;
6786         const struct cs_section_def *sect = NULL;
6787         const struct cs_extent_def *ext = NULL;
6788
6789         if (rdev->rlc.cs_data == NULL)
6790                 return;
6791         if (buffer == NULL)
6792                 return;
6793
6794         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6795         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6796
6797         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6798         buffer[count++] = cpu_to_le32(0x80000000);
6799         buffer[count++] = cpu_to_le32(0x80000000);
6800
6801         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6802                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6803                         if (sect->id == SECT_CONTEXT) {
6804                                 buffer[count++] =
6805                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6806                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6807                                 for (i = 0; i < ext->reg_count; i++)
6808                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6809                         } else {
6810                                 return;
6811                         }
6812                 }
6813         }
6814
6815         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6816         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6817         switch (rdev->family) {
6818         case CHIP_BONAIRE:
6819                 buffer[count++] = cpu_to_le32(0x16000012);
6820                 buffer[count++] = cpu_to_le32(0x00000000);
6821                 break;
6822         case CHIP_KAVERI:
6823                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6824                 buffer[count++] = cpu_to_le32(0x00000000);
6825                 break;
6826         case CHIP_KABINI:
6827         case CHIP_MULLINS:
6828                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6829                 buffer[count++] = cpu_to_le32(0x00000000);
6830                 break;
6831         case CHIP_HAWAII:
6832                 buffer[count++] = cpu_to_le32(0x3a00161a);
6833                 buffer[count++] = cpu_to_le32(0x0000002e);
6834                 break;
6835         default:
6836                 buffer[count++] = cpu_to_le32(0x00000000);
6837                 buffer[count++] = cpu_to_le32(0x00000000);
6838                 break;
6839         }
6840
6841         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6842         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6843
6844         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6845         buffer[count++] = cpu_to_le32(0);
6846 }
6847
6848 static void cik_init_pg(struct radeon_device *rdev)
6849 {
6850         if (rdev->pg_flags) {
6851                 cik_enable_sck_slowdown_on_pu(rdev, true);
6852                 cik_enable_sck_slowdown_on_pd(rdev, true);
6853                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6854                         cik_init_gfx_cgpg(rdev);
6855                         cik_enable_cp_pg(rdev, true);
6856                         cik_enable_gds_pg(rdev, true);
6857                 }
6858                 cik_init_ao_cu_mask(rdev);
6859                 cik_update_gfx_pg(rdev, true);
6860         }
6861 }
6862
6863 static void cik_fini_pg(struct radeon_device *rdev)
6864 {
6865         if (rdev->pg_flags) {
6866                 cik_update_gfx_pg(rdev, false);
6867                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6868                         cik_enable_cp_pg(rdev, false);
6869                         cik_enable_gds_pg(rdev, false);
6870                 }
6871         }
6872 }
6873
6874 /*
6875  * Interrupts
6876  * Starting with r6xx, interrupts are handled via a ring buffer.
6877  * Ring buffers are areas of GPU accessible memory that the GPU
6878  * writes interrupt vectors into and the host reads vectors out of.
6879  * There is a rptr (read pointer) that determines where the
6880  * host is currently reading, and a wptr (write pointer)
6881  * which determines where the GPU has written.  When the
6882  * pointers are equal, the ring is idle.  When the GPU
6883  * writes vectors to the ring buffer, it increments the
6884  * wptr.  When there is an interrupt, the host then starts
6885  * fetching commands and processing them until the pointers are
6886  * equal again at which point it updates the rptr.
6887  */
6888
6889 /**
6890  * cik_enable_interrupts - Enable the interrupt ring buffer
6891  *
6892  * @rdev: radeon_device pointer
6893  *
6894  * Enable the interrupt ring buffer (CIK).
6895  */
6896 static void cik_enable_interrupts(struct radeon_device *rdev)
6897 {
6898         u32 ih_cntl = RREG32(IH_CNTL);
6899         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6900
6901         ih_cntl |= ENABLE_INTR;
6902         ih_rb_cntl |= IH_RB_ENABLE;
6903         WREG32(IH_CNTL, ih_cntl);
6904         WREG32(IH_RB_CNTL, ih_rb_cntl);
6905         rdev->ih.enabled = true;
6906 }
6907
6908 /**
6909  * cik_disable_interrupts - Disable the interrupt ring buffer
6910  *
6911  * @rdev: radeon_device pointer
6912  *
6913  * Disable the interrupt ring buffer (CIK).
6914  */
6915 static void cik_disable_interrupts(struct radeon_device *rdev)
6916 {
6917         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6918         u32 ih_cntl = RREG32(IH_CNTL);
6919
6920         ih_rb_cntl &= ~IH_RB_ENABLE;
6921         ih_cntl &= ~ENABLE_INTR;
6922         WREG32(IH_RB_CNTL, ih_rb_cntl);
6923         WREG32(IH_CNTL, ih_cntl);
6924         /* set rptr, wptr to 0 */
6925         WREG32(IH_RB_RPTR, 0);
6926         WREG32(IH_RB_WPTR, 0);
6927         rdev->ih.enabled = false;
6928         rdev->ih.rptr = 0;
6929 }
6930
6931 /**
6932  * cik_disable_interrupt_state - Disable all interrupt sources
6933  *
6934  * @rdev: radeon_device pointer
6935  *
6936  * Clear all interrupt enable bits used by the driver (CIK).
6937  */
6938 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6939 {
6940         u32 tmp;
6941
6942         /* gfx ring */
6943         tmp = RREG32(CP_INT_CNTL_RING0) &
6944                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6945         WREG32(CP_INT_CNTL_RING0, tmp);
6946         /* sdma */
6947         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6948         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6949         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6950         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6951         /* compute queues */
6952         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6953         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6954         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6955         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6956         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6957         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6958         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6959         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6960         /* grbm */
6961         WREG32(GRBM_INT_CNTL, 0);
6962         /* SRBM */
6963         WREG32(SRBM_INT_CNTL, 0);
6964         /* vline/vblank, etc. */
6965         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6966         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6967         if (rdev->num_crtc >= 4) {
6968                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6969                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6970         }
6971         if (rdev->num_crtc >= 6) {
6972                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6973                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6974         }
6975         /* pflip */
6976         if (rdev->num_crtc >= 2) {
6977                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6978                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6979         }
6980         if (rdev->num_crtc >= 4) {
6981                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6982                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6983         }
6984         if (rdev->num_crtc >= 6) {
6985                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6986                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6987         }
6988
6989         /* dac hotplug */
6990         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6991
6992         /* digital hotplug */
6993         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6994         WREG32(DC_HPD1_INT_CONTROL, tmp);
6995         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6996         WREG32(DC_HPD2_INT_CONTROL, tmp);
6997         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6998         WREG32(DC_HPD3_INT_CONTROL, tmp);
6999         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7000         WREG32(DC_HPD4_INT_CONTROL, tmp);
7001         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7002         WREG32(DC_HPD5_INT_CONTROL, tmp);
7003         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7004         WREG32(DC_HPD6_INT_CONTROL, tmp);
7005
7006 }
7007
7008 /**
7009  * cik_irq_init - init and enable the interrupt ring
7010  *
7011  * @rdev: radeon_device pointer
7012  *
7013  * Allocate a ring buffer for the interrupt controller,
7014  * enable the RLC, disable interrupts, enable the IH
7015  * ring buffer and enable it (CIK).
7016  * Called at device load and reume.
7017  * Returns 0 for success, errors for failure.
7018  */
7019 static int cik_irq_init(struct radeon_device *rdev)
7020 {
7021         int ret = 0;
7022         int rb_bufsz;
7023         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7024
7025         /* allocate ring */
7026         ret = r600_ih_ring_alloc(rdev);
7027         if (ret)
7028                 return ret;
7029
7030         /* disable irqs */
7031         cik_disable_interrupts(rdev);
7032
7033         /* init rlc */
7034         ret = cik_rlc_resume(rdev);
7035         if (ret) {
7036                 r600_ih_ring_fini(rdev);
7037                 return ret;
7038         }
7039
7040         /* setup interrupt control */
7041         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7042         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7043         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7044         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7045          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7046          */
7047         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7048         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7049         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7050         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7051
7052         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7053         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7054
7055         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7056                       IH_WPTR_OVERFLOW_CLEAR |
7057                       (rb_bufsz << 1));
7058
7059         if (rdev->wb.enabled)
7060                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7061
7062         /* set the writeback address whether it's enabled or not */
7063         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7064         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7065
7066         WREG32(IH_RB_CNTL, ih_rb_cntl);
7067
7068         /* set rptr, wptr to 0 */
7069         WREG32(IH_RB_RPTR, 0);
7070         WREG32(IH_RB_WPTR, 0);
7071
7072         /* Default settings for IH_CNTL (disabled at first) */
7073         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7074         /* RPTR_REARM only works if msi's are enabled */
7075         if (rdev->msi_enabled)
7076                 ih_cntl |= RPTR_REARM;
7077         WREG32(IH_CNTL, ih_cntl);
7078
7079         /* force the active interrupt state to all disabled */
7080         cik_disable_interrupt_state(rdev);
7081
7082         pci_set_master(rdev->pdev);
7083
7084         /* enable irqs */
7085         cik_enable_interrupts(rdev);
7086
7087         return ret;
7088 }
7089
7090 /**
7091  * cik_irq_set - enable/disable interrupt sources
7092  *
7093  * @rdev: radeon_device pointer
7094  *
7095  * Enable interrupt sources on the GPU (vblanks, hpd,
7096  * etc.) (CIK).
7097  * Returns 0 for success, errors for failure.
7098  */
7099 int cik_irq_set(struct radeon_device *rdev)
7100 {
7101         u32 cp_int_cntl;
7102         u32 cp_m1p0;
7103         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7104         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7105         u32 grbm_int_cntl = 0;
7106         u32 dma_cntl, dma_cntl1;
7107
7108         if (!rdev->irq.installed) {
7109                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7110                 return -EINVAL;
7111         }
7112         /* don't enable anything if the ih is disabled */
7113         if (!rdev->ih.enabled) {
7114                 cik_disable_interrupts(rdev);
7115                 /* force the active interrupt state to all disabled */
7116                 cik_disable_interrupt_state(rdev);
7117                 return 0;
7118         }
7119
7120         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7121                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7122         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7123
7124         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7125         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7126         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7127         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7128         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7129         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7130
7131         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7132         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7133
7134         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7135
7136         /* enable CP interrupts on all rings */
7137         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7138                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7139                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7140         }
7141         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7142                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7143                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7144                 if (ring->me == 1) {
7145                         switch (ring->pipe) {
7146                         case 0:
7147                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7148                                 break;
7149                         default:
7150                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7151                                 break;
7152                         }
7153                 } else {
7154                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7155                 }
7156         }
7157         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7158                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7159                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7160                 if (ring->me == 1) {
7161                         switch (ring->pipe) {
7162                         case 0:
7163                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7164                                 break;
7165                         default:
7166                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7167                                 break;
7168                         }
7169                 } else {
7170                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7171                 }
7172         }
7173
7174         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7175                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7176                 dma_cntl |= TRAP_ENABLE;
7177         }
7178
7179         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7180                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7181                 dma_cntl1 |= TRAP_ENABLE;
7182         }
7183
7184         if (rdev->irq.crtc_vblank_int[0] ||
7185             atomic_read(&rdev->irq.pflip[0])) {
7186                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7187                 crtc1 |= VBLANK_INTERRUPT_MASK;
7188         }
7189         if (rdev->irq.crtc_vblank_int[1] ||
7190             atomic_read(&rdev->irq.pflip[1])) {
7191                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7192                 crtc2 |= VBLANK_INTERRUPT_MASK;
7193         }
7194         if (rdev->irq.crtc_vblank_int[2] ||
7195             atomic_read(&rdev->irq.pflip[2])) {
7196                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7197                 crtc3 |= VBLANK_INTERRUPT_MASK;
7198         }
7199         if (rdev->irq.crtc_vblank_int[3] ||
7200             atomic_read(&rdev->irq.pflip[3])) {
7201                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7202                 crtc4 |= VBLANK_INTERRUPT_MASK;
7203         }
7204         if (rdev->irq.crtc_vblank_int[4] ||
7205             atomic_read(&rdev->irq.pflip[4])) {
7206                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7207                 crtc5 |= VBLANK_INTERRUPT_MASK;
7208         }
7209         if (rdev->irq.crtc_vblank_int[5] ||
7210             atomic_read(&rdev->irq.pflip[5])) {
7211                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7212                 crtc6 |= VBLANK_INTERRUPT_MASK;
7213         }
7214         if (rdev->irq.hpd[0]) {
7215                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7216                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7217         }
7218         if (rdev->irq.hpd[1]) {
7219                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7220                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7221         }
7222         if (rdev->irq.hpd[2]) {
7223                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7224                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7225         }
7226         if (rdev->irq.hpd[3]) {
7227                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7228                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7229         }
7230         if (rdev->irq.hpd[4]) {
7231                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7232                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7233         }
7234         if (rdev->irq.hpd[5]) {
7235                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7236                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7237         }
7238
7239         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7240
7241         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7242         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7243
7244         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7245
7246         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247
7248         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250         if (rdev->num_crtc >= 4) {
7251                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253         }
7254         if (rdev->num_crtc >= 6) {
7255                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257         }
7258
7259         if (rdev->num_crtc >= 2) {
7260                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261                        GRPH_PFLIP_INT_MASK);
7262                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263                        GRPH_PFLIP_INT_MASK);
7264         }
7265         if (rdev->num_crtc >= 4) {
7266                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267                        GRPH_PFLIP_INT_MASK);
7268                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269                        GRPH_PFLIP_INT_MASK);
7270         }
7271         if (rdev->num_crtc >= 6) {
7272                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273                        GRPH_PFLIP_INT_MASK);
7274                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275                        GRPH_PFLIP_INT_MASK);
7276         }
7277
7278         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284
7285         /* posting read */
7286         RREG32(SRBM_STATUS);
7287
7288         return 0;
7289 }
7290
7291 /**
7292  * cik_irq_ack - ack interrupt sources
7293  *
7294  * @rdev: radeon_device pointer
7295  *
7296  * Ack interrupt sources on the GPU (vblanks, hpd,
7297  * etc.) (CIK).  Certain interrupts sources are sw
7298  * generated and do not require an explicit ack.
7299  */
7300 static inline void cik_irq_ack(struct radeon_device *rdev)
7301 {
7302         u32 tmp;
7303
7304         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311
7312         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7314         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7316         if (rdev->num_crtc >= 4) {
7317                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7319                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7321         }
7322         if (rdev->num_crtc >= 6) {
7323                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7325                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7327         }
7328
7329         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331                        GRPH_PFLIP_INT_CLEAR);
7332         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334                        GRPH_PFLIP_INT_CLEAR);
7335         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343
7344         if (rdev->num_crtc >= 4) {
7345                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347                                GRPH_PFLIP_INT_CLEAR);
7348                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350                                GRPH_PFLIP_INT_CLEAR);
7351                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359         }
7360
7361         if (rdev->num_crtc >= 6) {
7362                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364                                GRPH_PFLIP_INT_CLEAR);
7365                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367                                GRPH_PFLIP_INT_CLEAR);
7368                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376         }
7377
7378         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7380                 tmp |= DC_HPDx_INT_ACK;
7381                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7382         }
7383         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7385                 tmp |= DC_HPDx_INT_ACK;
7386                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7387         }
7388         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7390                 tmp |= DC_HPDx_INT_ACK;
7391                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7392         }
7393         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7395                 tmp |= DC_HPDx_INT_ACK;
7396                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7397         }
7398         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7400                 tmp |= DC_HPDx_INT_ACK;
7401                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7402         }
7403         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7405                 tmp |= DC_HPDx_INT_ACK;
7406                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7407         }
7408         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7410                 tmp |= DC_HPDx_RX_INT_ACK;
7411                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7412         }
7413         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7415                 tmp |= DC_HPDx_RX_INT_ACK;
7416                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7417         }
7418         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7420                 tmp |= DC_HPDx_RX_INT_ACK;
7421                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7422         }
7423         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7425                 tmp |= DC_HPDx_RX_INT_ACK;
7426                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7427         }
7428         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7430                 tmp |= DC_HPDx_RX_INT_ACK;
7431                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7432         }
7433         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7435                 tmp |= DC_HPDx_RX_INT_ACK;
7436                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7437         }
7438 }
7439
7440 /**
7441  * cik_irq_disable - disable interrupts
7442  *
7443  * @rdev: radeon_device pointer
7444  *
7445  * Disable interrupts on the hw (CIK).
7446  */
7447 static void cik_irq_disable(struct radeon_device *rdev)
7448 {
7449         cik_disable_interrupts(rdev);
7450         /* Wait and acknowledge irq */
7451         mdelay(1);
7452         cik_irq_ack(rdev);
7453         cik_disable_interrupt_state(rdev);
7454 }
7455
7456 /**
7457  * cik_irq_disable - disable interrupts for suspend
7458  *
7459  * @rdev: radeon_device pointer
7460  *
7461  * Disable interrupts and stop the RLC (CIK).
7462  * Used for suspend.
7463  */
7464 static void cik_irq_suspend(struct radeon_device *rdev)
7465 {
7466         cik_irq_disable(rdev);
7467         cik_rlc_stop(rdev);
7468 }
7469
7470 /**
7471  * cik_irq_fini - tear down interrupt support
7472  *
7473  * @rdev: radeon_device pointer
7474  *
7475  * Disable interrupts on the hw and free the IH ring
7476  * buffer (CIK).
7477  * Used for driver unload.
7478  */
7479 static void cik_irq_fini(struct radeon_device *rdev)
7480 {
7481         cik_irq_suspend(rdev);
7482         r600_ih_ring_fini(rdev);
7483 }
7484
7485 /**
7486  * cik_get_ih_wptr - get the IH ring buffer wptr
7487  *
7488  * @rdev: radeon_device pointer
7489  *
7490  * Get the IH ring buffer wptr from either the register
7491  * or the writeback memory buffer (CIK).  Also check for
7492  * ring buffer overflow and deal with it.
7493  * Used by cik_irq_process().
7494  * Returns the value of the wptr.
7495  */
7496 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497 {
7498         u32 wptr, tmp;
7499
7500         if (rdev->wb.enabled)
7501                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502         else
7503                 wptr = RREG32(IH_RB_WPTR);
7504
7505         if (wptr & RB_OVERFLOW) {
7506                 wptr &= ~RB_OVERFLOW;
7507                 /* When a ring buffer overflow happen start parsing interrupt
7508                  * from the last not overwritten vector (wptr + 16). Hopefully
7509                  * this should allow us to catchup.
7510                  */
7511                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514                 tmp = RREG32(IH_RB_CNTL);
7515                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516                 WREG32(IH_RB_CNTL, tmp);
7517         }
7518         return (wptr & rdev->ih.ptr_mask);
7519 }
7520
7521 /*        CIK IV Ring
7522  * Each IV ring entry is 128 bits:
7523  * [7:0]    - interrupt source id
7524  * [31:8]   - reserved
7525  * [59:32]  - interrupt source data
7526  * [63:60]  - reserved
7527  * [71:64]  - RINGID
7528  *            CP:
7529  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533  *            PIPE_ID - ME0 0=3D
7534  *                    - ME1&2 compute dispatcher (4 pipes each)
7535  *            SDMA:
7536  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539  * [79:72]  - VMID
7540  * [95:80]  - PASID
7541  * [127:96] - reserved
7542  */
7543 /**
7544  * cik_irq_process - interrupt handler
7545  *
7546  * @rdev: radeon_device pointer
7547  *
7548  * Interrupt hander (CIK).  Walk the IH ring,
7549  * ack interrupts and schedule work to handle
7550  * interrupt events.
7551  * Returns irq process return code.
7552  */
7553 int cik_irq_process(struct radeon_device *rdev)
7554 {
7555         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557         u32 wptr;
7558         u32 rptr;
7559         u32 src_id, src_data, ring_id;
7560         u8 me_id, pipe_id, queue_id;
7561         u32 ring_index;
7562         bool queue_hotplug = false;
7563         bool queue_dp = false;
7564         bool queue_reset = false;
7565         u32 addr, status, mc_client;
7566         bool queue_thermal = false;
7567
7568         if (!rdev->ih.enabled || rdev->shutdown)
7569                 return IRQ_NONE;
7570
7571         wptr = cik_get_ih_wptr(rdev);
7572
7573 restart_ih:
7574         /* is somebody else already processing irqs? */
7575         if (atomic_xchg(&rdev->ih.lock, 1))
7576                 return IRQ_NONE;
7577
7578         rptr = rdev->ih.rptr;
7579         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580
7581         /* Order reading of wptr vs. reading of IH ring data */
7582         rmb();
7583
7584         /* display interrupts */
7585         cik_irq_ack(rdev);
7586
7587         while (rptr != wptr) {
7588                 /* wptr/rptr are in bytes! */
7589                 ring_index = rptr / 4;
7590
7591                 radeon_kfd_interrupt(rdev,
7592                                 (const void *) &rdev->ih.ring[ring_index]);
7593
7594                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7595                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7596                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7597
7598                 switch (src_id) {
7599                 case 1: /* D1 vblank/vline */
7600                         switch (src_data) {
7601                         case 0: /* D1 vblank */
7602                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7603                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7604
7605                                 if (rdev->irq.crtc_vblank_int[0]) {
7606                                         drm_handle_vblank(rdev->ddev, 0);
7607                                         rdev->pm.vblank_sync = true;
7608                                         wake_up(&rdev->irq.vblank_queue);
7609                                 }
7610                                 if (atomic_read(&rdev->irq.pflip[0]))
7611                                         radeon_crtc_handle_vblank(rdev, 0);
7612                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7613                                 DRM_DEBUG("IH: D1 vblank\n");
7614
7615                                 break;
7616                         case 1: /* D1 vline */
7617                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7618                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7619
7620                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7621                                 DRM_DEBUG("IH: D1 vline\n");
7622
7623                                 break;
7624                         default:
7625                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7626                                 break;
7627                         }
7628                         break;
7629                 case 2: /* D2 vblank/vline */
7630                         switch (src_data) {
7631                         case 0: /* D2 vblank */
7632                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7633                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7634
7635                                 if (rdev->irq.crtc_vblank_int[1]) {
7636                                         drm_handle_vblank(rdev->ddev, 1);
7637                                         rdev->pm.vblank_sync = true;
7638                                         wake_up(&rdev->irq.vblank_queue);
7639                                 }
7640                                 if (atomic_read(&rdev->irq.pflip[1]))
7641                                         radeon_crtc_handle_vblank(rdev, 1);
7642                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7643                                 DRM_DEBUG("IH: D2 vblank\n");
7644
7645                                 break;
7646                         case 1: /* D2 vline */
7647                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7648                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7649
7650                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7651                                 DRM_DEBUG("IH: D2 vline\n");
7652
7653                                 break;
7654                         default:
7655                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7656                                 break;
7657                         }
7658                         break;
7659                 case 3: /* D3 vblank/vline */
7660                         switch (src_data) {
7661                         case 0: /* D3 vblank */
7662                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7663                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7664
7665                                 if (rdev->irq.crtc_vblank_int[2]) {
7666                                         drm_handle_vblank(rdev->ddev, 2);
7667                                         rdev->pm.vblank_sync = true;
7668                                         wake_up(&rdev->irq.vblank_queue);
7669                                 }
7670                                 if (atomic_read(&rdev->irq.pflip[2]))
7671                                         radeon_crtc_handle_vblank(rdev, 2);
7672                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7673                                 DRM_DEBUG("IH: D3 vblank\n");
7674
7675                                 break;
7676                         case 1: /* D3 vline */
7677                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7678                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7679
7680                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7681                                 DRM_DEBUG("IH: D3 vline\n");
7682
7683                                 break;
7684                         default:
7685                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7686                                 break;
7687                         }
7688                         break;
7689                 case 4: /* D4 vblank/vline */
7690                         switch (src_data) {
7691                         case 0: /* D4 vblank */
7692                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7693                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7694
7695                                 if (rdev->irq.crtc_vblank_int[3]) {
7696                                         drm_handle_vblank(rdev->ddev, 3);
7697                                         rdev->pm.vblank_sync = true;
7698                                         wake_up(&rdev->irq.vblank_queue);
7699                                 }
7700                                 if (atomic_read(&rdev->irq.pflip[3]))
7701                                         radeon_crtc_handle_vblank(rdev, 3);
7702                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7703                                 DRM_DEBUG("IH: D4 vblank\n");
7704
7705                                 break;
7706                         case 1: /* D4 vline */
7707                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7708                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7709
7710                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7711                                 DRM_DEBUG("IH: D4 vline\n");
7712
7713                                 break;
7714                         default:
7715                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7716                                 break;
7717                         }
7718                         break;
7719                 case 5: /* D5 vblank/vline */
7720                         switch (src_data) {
7721                         case 0: /* D5 vblank */
7722                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7723                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724
7725                                 if (rdev->irq.crtc_vblank_int[4]) {
7726                                         drm_handle_vblank(rdev->ddev, 4);
7727                                         rdev->pm.vblank_sync = true;
7728                                         wake_up(&rdev->irq.vblank_queue);
7729                                 }
7730                                 if (atomic_read(&rdev->irq.pflip[4]))
7731                                         radeon_crtc_handle_vblank(rdev, 4);
7732                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7733                                 DRM_DEBUG("IH: D5 vblank\n");
7734
7735                                 break;
7736                         case 1: /* D5 vline */
7737                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7738                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7739
7740                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7741                                 DRM_DEBUG("IH: D5 vline\n");
7742
7743                                 break;
7744                         default:
7745                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7746                                 break;
7747                         }
7748                         break;
7749                 case 6: /* D6 vblank/vline */
7750                         switch (src_data) {
7751                         case 0: /* D6 vblank */
7752                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7753                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7754
7755                                 if (rdev->irq.crtc_vblank_int[5]) {
7756                                         drm_handle_vblank(rdev->ddev, 5);
7757                                         rdev->pm.vblank_sync = true;
7758                                         wake_up(&rdev->irq.vblank_queue);
7759                                 }
7760                                 if (atomic_read(&rdev->irq.pflip[5]))
7761                                         radeon_crtc_handle_vblank(rdev, 5);
7762                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7763                                 DRM_DEBUG("IH: D6 vblank\n");
7764
7765                                 break;
7766                         case 1: /* D6 vline */
7767                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7768                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769
7770                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7771                                 DRM_DEBUG("IH: D6 vline\n");
7772
7773                                 break;
7774                         default:
7775                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7776                                 break;
7777                         }
7778                         break;
7779                 case 8: /* D1 page flip */
7780                 case 10: /* D2 page flip */
7781                 case 12: /* D3 page flip */
7782                 case 14: /* D4 page flip */
7783                 case 16: /* D5 page flip */
7784                 case 18: /* D6 page flip */
7785                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7786                         if (radeon_use_pflipirq > 0)
7787                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7788                         break;
7789                 case 42: /* HPD hotplug */
7790                         switch (src_data) {
7791                         case 0:
7792                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7793                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794
7795                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7796                                 queue_hotplug = true;
7797                                 DRM_DEBUG("IH: HPD1\n");
7798
7799                                 break;
7800                         case 1:
7801                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7802                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803
7804                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7805                                 queue_hotplug = true;
7806                                 DRM_DEBUG("IH: HPD2\n");
7807
7808                                 break;
7809                         case 2:
7810                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7811                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812
7813                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7814                                 queue_hotplug = true;
7815                                 DRM_DEBUG("IH: HPD3\n");
7816
7817                                 break;
7818                         case 3:
7819                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7820                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7823                                 queue_hotplug = true;
7824                                 DRM_DEBUG("IH: HPD4\n");
7825
7826                                 break;
7827                         case 4:
7828                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7829                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7832                                 queue_hotplug = true;
7833                                 DRM_DEBUG("IH: HPD5\n");
7834
7835                                 break;
7836                         case 5:
7837                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7838                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7841                                 queue_hotplug = true;
7842                                 DRM_DEBUG("IH: HPD6\n");
7843
7844                                 break;
7845                         case 6:
7846                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7847                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7850                                 queue_dp = true;
7851                                 DRM_DEBUG("IH: HPD_RX 1\n");
7852
7853                                 break;
7854                         case 7:
7855                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7856                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7859                                 queue_dp = true;
7860                                 DRM_DEBUG("IH: HPD_RX 2\n");
7861
7862                                 break;
7863                         case 8:
7864                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7865                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7868                                 queue_dp = true;
7869                                 DRM_DEBUG("IH: HPD_RX 3\n");
7870
7871                                 break;
7872                         case 9:
7873                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7874                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7877                                 queue_dp = true;
7878                                 DRM_DEBUG("IH: HPD_RX 4\n");
7879
7880                                 break;
7881                         case 10:
7882                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7883                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884
7885                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7886                                 queue_dp = true;
7887                                 DRM_DEBUG("IH: HPD_RX 5\n");
7888
7889                                 break;
7890                         case 11:
7891                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7892                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893
7894                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7895                                 queue_dp = true;
7896                                 DRM_DEBUG("IH: HPD_RX 6\n");
7897
7898                                 break;
7899                         default:
7900                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7901                                 break;
7902                         }
7903                         break;
7904                 case 96:
7905                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7906                         WREG32(SRBM_INT_ACK, 0x1);
7907                         break;
7908                 case 124: /* UVD */
7909                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7910                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7911                         break;
7912                 case 146:
7913                 case 147:
7914                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7915                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7916                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7917                         /* reset addr and status */
7918                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7919                         if (addr == 0x0 && status == 0x0)
7920                                 break;
7921                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7922                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7923                                 addr);
7924                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7925                                 status);
7926                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7927                         break;
7928                 case 167: /* VCE */
7929                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7930                         switch (src_data) {
7931                         case 0:
7932                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7933                                 break;
7934                         case 1:
7935                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7936                                 break;
7937                         default:
7938                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7939                                 break;
7940                         }
7941                         break;
7942                 case 176: /* GFX RB CP_INT */
7943                 case 177: /* GFX IB CP_INT */
7944                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7945                         break;
7946                 case 181: /* CP EOP event */
7947                         DRM_DEBUG("IH: CP EOP\n");
7948                         /* XXX check the bitfield order! */
7949                         me_id = (ring_id & 0x60) >> 5;
7950                         pipe_id = (ring_id & 0x18) >> 3;
7951                         queue_id = (ring_id & 0x7) >> 0;
7952                         switch (me_id) {
7953                         case 0:
7954                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7955                                 break;
7956                         case 1:
7957                         case 2:
7958                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7959                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7960                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7961                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7962                                 break;
7963                         }
7964                         break;
7965                 case 184: /* CP Privileged reg access */
7966                         DRM_ERROR("Illegal register access in command stream\n");
7967                         /* XXX check the bitfield order! */
7968                         me_id = (ring_id & 0x60) >> 5;
7969                         pipe_id = (ring_id & 0x18) >> 3;
7970                         queue_id = (ring_id & 0x7) >> 0;
7971                         switch (me_id) {
7972                         case 0:
7973                                 /* This results in a full GPU reset, but all we need to do is soft
7974                                  * reset the CP for gfx
7975                                  */
7976                                 queue_reset = true;
7977                                 break;
7978                         case 1:
7979                                 /* XXX compute */
7980                                 queue_reset = true;
7981                                 break;
7982                         case 2:
7983                                 /* XXX compute */
7984                                 queue_reset = true;
7985                                 break;
7986                         }
7987                         break;
7988                 case 185: /* CP Privileged inst */
7989                         DRM_ERROR("Illegal instruction in command stream\n");
7990                         /* XXX check the bitfield order! */
7991                         me_id = (ring_id & 0x60) >> 5;
7992                         pipe_id = (ring_id & 0x18) >> 3;
7993                         queue_id = (ring_id & 0x7) >> 0;
7994                         switch (me_id) {
7995                         case 0:
7996                                 /* This results in a full GPU reset, but all we need to do is soft
7997                                  * reset the CP for gfx
7998                                  */
7999                                 queue_reset = true;
8000                                 break;
8001                         case 1:
8002                                 /* XXX compute */
8003                                 queue_reset = true;
8004                                 break;
8005                         case 2:
8006                                 /* XXX compute */
8007                                 queue_reset = true;
8008                                 break;
8009                         }
8010                         break;
8011                 case 224: /* SDMA trap event */
8012                         /* XXX check the bitfield order! */
8013                         me_id = (ring_id & 0x3) >> 0;
8014                         queue_id = (ring_id & 0xc) >> 2;
8015                         DRM_DEBUG("IH: SDMA trap\n");
8016                         switch (me_id) {
8017                         case 0:
8018                                 switch (queue_id) {
8019                                 case 0:
8020                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8021                                         break;
8022                                 case 1:
8023                                         /* XXX compute */
8024                                         break;
8025                                 case 2:
8026                                         /* XXX compute */
8027                                         break;
8028                                 }
8029                                 break;
8030                         case 1:
8031                                 switch (queue_id) {
8032                                 case 0:
8033                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8034                                         break;
8035                                 case 1:
8036                                         /* XXX compute */
8037                                         break;
8038                                 case 2:
8039                                         /* XXX compute */
8040                                         break;
8041                                 }
8042                                 break;
8043                         }
8044                         break;
8045                 case 230: /* thermal low to high */
8046                         DRM_DEBUG("IH: thermal low to high\n");
8047                         rdev->pm.dpm.thermal.high_to_low = false;
8048                         queue_thermal = true;
8049                         break;
8050                 case 231: /* thermal high to low */
8051                         DRM_DEBUG("IH: thermal high to low\n");
8052                         rdev->pm.dpm.thermal.high_to_low = true;
8053                         queue_thermal = true;
8054                         break;
8055                 case 233: /* GUI IDLE */
8056                         DRM_DEBUG("IH: GUI idle\n");
8057                         break;
8058                 case 241: /* SDMA Privileged inst */
8059                 case 247: /* SDMA Privileged inst */
8060                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8061                         /* XXX check the bitfield order! */
8062                         me_id = (ring_id & 0x3) >> 0;
8063                         queue_id = (ring_id & 0xc) >> 2;
8064                         switch (me_id) {
8065                         case 0:
8066                                 switch (queue_id) {
8067                                 case 0:
8068                                         queue_reset = true;
8069                                         break;
8070                                 case 1:
8071                                         /* XXX compute */
8072                                         queue_reset = true;
8073                                         break;
8074                                 case 2:
8075                                         /* XXX compute */
8076                                         queue_reset = true;
8077                                         break;
8078                                 }
8079                                 break;
8080                         case 1:
8081                                 switch (queue_id) {
8082                                 case 0:
8083                                         queue_reset = true;
8084                                         break;
8085                                 case 1:
8086                                         /* XXX compute */
8087                                         queue_reset = true;
8088                                         break;
8089                                 case 2:
8090                                         /* XXX compute */
8091                                         queue_reset = true;
8092                                         break;
8093                                 }
8094                                 break;
8095                         }
8096                         break;
8097                 default:
8098                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8099                         break;
8100                 }
8101
8102                 /* wptr/rptr are in bytes! */
8103                 rptr += 16;
8104                 rptr &= rdev->ih.ptr_mask;
8105                 WREG32(IH_RB_RPTR, rptr);
8106         }
8107         if (queue_dp)
8108                 schedule_work(&rdev->dp_work);
8109         if (queue_hotplug)
8110                 schedule_delayed_work(&rdev->hotplug_work, 0);
8111         if (queue_reset) {
8112                 rdev->needs_reset = true;
8113                 wake_up_all(&rdev->fence_queue);
8114         }
8115         if (queue_thermal)
8116                 schedule_work(&rdev->pm.dpm.thermal.work);
8117         rdev->ih.rptr = rptr;
8118         atomic_set(&rdev->ih.lock, 0);
8119
8120         /* make sure wptr hasn't changed while processing */
8121         wptr = cik_get_ih_wptr(rdev);
8122         if (wptr != rptr)
8123                 goto restart_ih;
8124
8125         return IRQ_HANDLED;
8126 }
8127
8128 /*
8129  * startup/shutdown callbacks
8130  */
8131 static void cik_uvd_init(struct radeon_device *rdev)
8132 {
8133         int r;
8134
8135         if (!rdev->has_uvd)
8136                 return;
8137
8138         r = radeon_uvd_init(rdev);
8139         if (r) {
8140                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8141                 /*
8142                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8143                  * to early fails cik_uvd_start() and thus nothing happens
8144                  * there. So it is pointless to try to go through that code
8145                  * hence why we disable uvd here.
8146                  */
8147                 rdev->has_uvd = 0;
8148                 return;
8149         }
8150         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8151         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8152 }
8153
8154 static void cik_uvd_start(struct radeon_device *rdev)
8155 {
8156         int r;
8157
8158         if (!rdev->has_uvd)
8159                 return;
8160
8161         r = radeon_uvd_resume(rdev);
8162         if (r) {
8163                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8164                 goto error;
8165         }
8166         r = uvd_v4_2_resume(rdev);
8167         if (r) {
8168                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8169                 goto error;
8170         }
8171         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8172         if (r) {
8173                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8174                 goto error;
8175         }
8176         return;
8177
8178 error:
8179         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8180 }
8181
8182 static void cik_uvd_resume(struct radeon_device *rdev)
8183 {
8184         struct radeon_ring *ring;
8185         int r;
8186
8187         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8188                 return;
8189
8190         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8191         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8192         if (r) {
8193                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8194                 return;
8195         }
8196         r = uvd_v1_0_init(rdev);
8197         if (r) {
8198                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8199                 return;
8200         }
8201 }
8202
8203 static void cik_vce_init(struct radeon_device *rdev)
8204 {
8205         int r;
8206
8207         if (!rdev->has_vce)
8208                 return;
8209
8210         r = radeon_vce_init(rdev);
8211         if (r) {
8212                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8213                 /*
8214                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8215                  * to early fails cik_vce_start() and thus nothing happens
8216                  * there. So it is pointless to try to go through that code
8217                  * hence why we disable vce here.
8218                  */
8219                 rdev->has_vce = 0;
8220                 return;
8221         }
8222         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8223         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8224         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8225         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8226 }
8227
8228 static void cik_vce_start(struct radeon_device *rdev)
8229 {
8230         int r;
8231
8232         if (!rdev->has_vce)
8233                 return;
8234
8235         r = radeon_vce_resume(rdev);
8236         if (r) {
8237                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8238                 goto error;
8239         }
8240         r = vce_v2_0_resume(rdev);
8241         if (r) {
8242                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8243                 goto error;
8244         }
8245         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8246         if (r) {
8247                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8248                 goto error;
8249         }
8250         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8251         if (r) {
8252                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8253                 goto error;
8254         }
8255         return;
8256
8257 error:
8258         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8259         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8260 }
8261
8262 static void cik_vce_resume(struct radeon_device *rdev)
8263 {
8264         struct radeon_ring *ring;
8265         int r;
8266
8267         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8268                 return;
8269
8270         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8271         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8272         if (r) {
8273                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8274                 return;
8275         }
8276         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8277         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8278         if (r) {
8279                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8280                 return;
8281         }
8282         r = vce_v1_0_init(rdev);
8283         if (r) {
8284                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8285                 return;
8286         }
8287 }
8288
8289 /**
8290  * cik_startup - program the asic to a functional state
8291  *
8292  * @rdev: radeon_device pointer
8293  *
8294  * Programs the asic to a functional state (CIK).
8295  * Called by cik_init() and cik_resume().
8296  * Returns 0 for success, error for failure.
8297  */
8298 static int cik_startup(struct radeon_device *rdev)
8299 {
8300         struct radeon_ring *ring;
8301         u32 nop;
8302         int r;
8303
8304         /* enable pcie gen2/3 link */
8305         cik_pcie_gen3_enable(rdev);
8306         /* enable aspm */
8307         cik_program_aspm(rdev);
8308
8309         /* scratch needs to be initialized before MC */
8310         r = r600_vram_scratch_init(rdev);
8311         if (r)
8312                 return r;
8313
8314         cik_mc_program(rdev);
8315
8316         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8317                 r = ci_mc_load_microcode(rdev);
8318                 if (r) {
8319                         DRM_ERROR("Failed to load MC firmware!\n");
8320                         return r;
8321                 }
8322         }
8323
8324         r = cik_pcie_gart_enable(rdev);
8325         if (r)
8326                 return r;
8327         cik_gpu_init(rdev);
8328
8329         /* allocate rlc buffers */
8330         if (rdev->flags & RADEON_IS_IGP) {
8331                 if (rdev->family == CHIP_KAVERI) {
8332                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8333                         rdev->rlc.reg_list_size =
8334                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8335                 } else {
8336                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8337                         rdev->rlc.reg_list_size =
8338                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8339                 }
8340         }
8341         rdev->rlc.cs_data = ci_cs_data;
8342         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8343         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8344         r = sumo_rlc_init(rdev);
8345         if (r) {
8346                 DRM_ERROR("Failed to init rlc BOs!\n");
8347                 return r;
8348         }
8349
8350         /* allocate wb buffer */
8351         r = radeon_wb_init(rdev);
8352         if (r)
8353                 return r;
8354
8355         /* allocate mec buffers */
8356         r = cik_mec_init(rdev);
8357         if (r) {
8358                 DRM_ERROR("Failed to init MEC BOs!\n");
8359                 return r;
8360         }
8361
8362         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8363         if (r) {
8364                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8365                 return r;
8366         }
8367
8368         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8369         if (r) {
8370                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8371                 return r;
8372         }
8373
8374         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8375         if (r) {
8376                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8377                 return r;
8378         }
8379
8380         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8381         if (r) {
8382                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8383                 return r;
8384         }
8385
8386         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8387         if (r) {
8388                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8389                 return r;
8390         }
8391
8392         cik_uvd_start(rdev);
8393         cik_vce_start(rdev);
8394
8395         /* Enable IRQ */
8396         if (!rdev->irq.installed) {
8397                 r = radeon_irq_kms_init(rdev);
8398                 if (r)
8399                         return r;
8400         }
8401
8402         r = cik_irq_init(rdev);
8403         if (r) {
8404                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8405                 radeon_irq_kms_fini(rdev);
8406                 return r;
8407         }
8408         cik_irq_set(rdev);
8409
8410         if (rdev->family == CHIP_HAWAII) {
8411                 if (rdev->new_fw)
8412                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8413                 else
8414                         nop = RADEON_CP_PACKET2;
8415         } else {
8416                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8417         }
8418
8419         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8420         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8421                              nop);
8422         if (r)
8423                 return r;
8424
8425         /* set up the compute queues */
8426         /* type-2 packets are deprecated on MEC, use type-3 instead */
8427         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8428         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8429                              nop);
8430         if (r)
8431                 return r;
8432         ring->me = 1; /* first MEC */
8433         ring->pipe = 0; /* first pipe */
8434         ring->queue = 0; /* first queue */
8435         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8436
8437         /* type-2 packets are deprecated on MEC, use type-3 instead */
8438         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8439         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8440                              nop);
8441         if (r)
8442                 return r;
8443         /* dGPU only have 1 MEC */
8444         ring->me = 1; /* first MEC */
8445         ring->pipe = 0; /* first pipe */
8446         ring->queue = 1; /* second queue */
8447         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8448
8449         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8450         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8451                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8452         if (r)
8453                 return r;
8454
8455         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8456         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8457                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8458         if (r)
8459                 return r;
8460
8461         r = cik_cp_resume(rdev);
8462         if (r)
8463                 return r;
8464
8465         r = cik_sdma_resume(rdev);
8466         if (r)
8467                 return r;
8468
8469         cik_uvd_resume(rdev);
8470         cik_vce_resume(rdev);
8471
8472         r = radeon_ib_pool_init(rdev);
8473         if (r) {
8474                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8475                 return r;
8476         }
8477
8478         r = radeon_vm_manager_init(rdev);
8479         if (r) {
8480                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8481                 return r;
8482         }
8483
8484         r = radeon_audio_init(rdev);
8485         if (r)
8486                 return r;
8487
8488         r = radeon_kfd_resume(rdev);
8489         if (r)
8490                 return r;
8491
8492         return 0;
8493 }
8494
8495 /**
8496  * cik_resume - resume the asic to a functional state
8497  *
8498  * @rdev: radeon_device pointer
8499  *
8500  * Programs the asic to a functional state (CIK).
8501  * Called at resume.
8502  * Returns 0 for success, error for failure.
8503  */
8504 int cik_resume(struct radeon_device *rdev)
8505 {
8506         int r;
8507
8508         /* post card */
8509         atom_asic_init(rdev->mode_info.atom_context);
8510
8511         /* init golden registers */
8512         cik_init_golden_registers(rdev);
8513
8514         if (rdev->pm.pm_method == PM_METHOD_DPM)
8515                 radeon_pm_resume(rdev);
8516
8517         rdev->accel_working = true;
8518         r = cik_startup(rdev);
8519         if (r) {
8520                 DRM_ERROR("cik startup failed on resume\n");
8521                 rdev->accel_working = false;
8522                 return r;
8523         }
8524
8525         return r;
8526
8527 }
8528
8529 /**
8530  * cik_suspend - suspend the asic
8531  *
8532  * @rdev: radeon_device pointer
8533  *
8534  * Bring the chip into a state suitable for suspend (CIK).
8535  * Called at suspend.
8536  * Returns 0 for success.
8537  */
8538 int cik_suspend(struct radeon_device *rdev)
8539 {
8540         radeon_kfd_suspend(rdev);
8541         radeon_pm_suspend(rdev);
8542         radeon_audio_fini(rdev);
8543         radeon_vm_manager_fini(rdev);
8544         cik_cp_enable(rdev, false);
8545         cik_sdma_enable(rdev, false);
8546         if (rdev->has_uvd) {
8547                 uvd_v1_0_fini(rdev);
8548                 radeon_uvd_suspend(rdev);
8549         }
8550         if (rdev->has_vce)
8551                 radeon_vce_suspend(rdev);
8552         cik_fini_pg(rdev);
8553         cik_fini_cg(rdev);
8554         cik_irq_suspend(rdev);
8555         radeon_wb_disable(rdev);
8556         cik_pcie_gart_disable(rdev);
8557         return 0;
8558 }
8559
8560 /* Plan is to move initialization in that function and use
8561  * helper function so that radeon_device_init pretty much
8562  * do nothing more than calling asic specific function. This
8563  * should also allow to remove a bunch of callback function
8564  * like vram_info.
8565  */
8566 /**
8567  * cik_init - asic specific driver and hw init
8568  *
8569  * @rdev: radeon_device pointer
8570  *
8571  * Setup asic specific driver variables and program the hw
8572  * to a functional state (CIK).
8573  * Called at driver startup.
8574  * Returns 0 for success, errors for failure.
8575  */
8576 int cik_init(struct radeon_device *rdev)
8577 {
8578         struct radeon_ring *ring;
8579         int r;
8580
8581         /* Read BIOS */
8582         if (!radeon_get_bios(rdev)) {
8583                 if (ASIC_IS_AVIVO(rdev))
8584                         return -EINVAL;
8585         }
8586         /* Must be an ATOMBIOS */
8587         if (!rdev->is_atom_bios) {
8588                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8589                 return -EINVAL;
8590         }
8591         r = radeon_atombios_init(rdev);
8592         if (r)
8593                 return r;
8594
8595         /* Post card if necessary */
8596         if (!radeon_card_posted(rdev)) {
8597                 if (!rdev->bios) {
8598                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8599                         return -EINVAL;
8600                 }
8601                 DRM_INFO("GPU not posted. posting now...\n");
8602                 atom_asic_init(rdev->mode_info.atom_context);
8603         }
8604         /* init golden registers */
8605         cik_init_golden_registers(rdev);
8606         /* Initialize scratch registers */
8607         cik_scratch_init(rdev);
8608         /* Initialize surface registers */
8609         radeon_surface_init(rdev);
8610         /* Initialize clocks */
8611         radeon_get_clock_info(rdev->ddev);
8612
8613         /* Fence driver */
8614         r = radeon_fence_driver_init(rdev);
8615         if (r)
8616                 return r;
8617
8618         /* initialize memory controller */
8619         r = cik_mc_init(rdev);
8620         if (r)
8621                 return r;
8622         /* Memory manager */
8623         r = radeon_bo_init(rdev);
8624         if (r)
8625                 return r;
8626
8627         if (rdev->flags & RADEON_IS_IGP) {
8628                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8629                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8630                         r = cik_init_microcode(rdev);
8631                         if (r) {
8632                                 DRM_ERROR("Failed to load firmware!\n");
8633                                 return r;
8634                         }
8635                 }
8636         } else {
8637                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8638                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8639                     !rdev->mc_fw) {
8640                         r = cik_init_microcode(rdev);
8641                         if (r) {
8642                                 DRM_ERROR("Failed to load firmware!\n");
8643                                 return r;
8644                         }
8645                 }
8646         }
8647
8648         /* Initialize power management */
8649         radeon_pm_init(rdev);
8650
8651         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8652         ring->ring_obj = NULL;
8653         r600_ring_init(rdev, ring, 1024 * 1024);
8654
8655         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8656         ring->ring_obj = NULL;
8657         r600_ring_init(rdev, ring, 1024 * 1024);
8658         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8659         if (r)
8660                 return r;
8661
8662         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8663         ring->ring_obj = NULL;
8664         r600_ring_init(rdev, ring, 1024 * 1024);
8665         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8666         if (r)
8667                 return r;
8668
8669         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8670         ring->ring_obj = NULL;
8671         r600_ring_init(rdev, ring, 256 * 1024);
8672
8673         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8674         ring->ring_obj = NULL;
8675         r600_ring_init(rdev, ring, 256 * 1024);
8676
8677         cik_uvd_init(rdev);
8678         cik_vce_init(rdev);
8679
8680         rdev->ih.ring_obj = NULL;
8681         r600_ih_ring_init(rdev, 64 * 1024);
8682
8683         r = r600_pcie_gart_init(rdev);
8684         if (r)
8685                 return r;
8686
8687         rdev->accel_working = true;
8688         r = cik_startup(rdev);
8689         if (r) {
8690                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8691                 cik_cp_fini(rdev);
8692                 cik_sdma_fini(rdev);
8693                 cik_irq_fini(rdev);
8694                 sumo_rlc_fini(rdev);
8695                 cik_mec_fini(rdev);
8696                 radeon_wb_fini(rdev);
8697                 radeon_ib_pool_fini(rdev);
8698                 radeon_vm_manager_fini(rdev);
8699                 radeon_irq_kms_fini(rdev);
8700                 cik_pcie_gart_fini(rdev);
8701                 rdev->accel_working = false;
8702         }
8703
8704         /* Don't start up if the MC ucode is missing.
8705          * The default clocks and voltages before the MC ucode
8706          * is loaded are not suffient for advanced operations.
8707          */
8708         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8709                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8710                 return -EINVAL;
8711         }
8712
8713         return 0;
8714 }
8715
8716 /**
8717  * cik_fini - asic specific driver and hw fini
8718  *
8719  * @rdev: radeon_device pointer
8720  *
8721  * Tear down the asic specific driver variables and program the hw
8722  * to an idle state (CIK).
8723  * Called at driver unload.
8724  */
8725 void cik_fini(struct radeon_device *rdev)
8726 {
8727         radeon_pm_fini(rdev);
8728         cik_cp_fini(rdev);
8729         cik_sdma_fini(rdev);
8730         cik_fini_pg(rdev);
8731         cik_fini_cg(rdev);
8732         cik_irq_fini(rdev);
8733         sumo_rlc_fini(rdev);
8734         cik_mec_fini(rdev);
8735         radeon_wb_fini(rdev);
8736         radeon_vm_manager_fini(rdev);
8737         radeon_ib_pool_fini(rdev);
8738         radeon_irq_kms_fini(rdev);
8739         uvd_v1_0_fini(rdev);
8740         radeon_uvd_fini(rdev);
8741         radeon_vce_fini(rdev);
8742         cik_pcie_gart_fini(rdev);
8743         r600_vram_scratch_fini(rdev);
8744         radeon_gem_fini(rdev);
8745         radeon_fence_driver_fini(rdev);
8746         radeon_bo_fini(rdev);
8747         radeon_atombios_fini(rdev);
8748         kfree(rdev->bios);
8749         rdev->bios = NULL;
8750 }
8751
8752 void dce8_program_fmt(struct drm_encoder *encoder)
8753 {
8754         struct drm_device *dev = encoder->dev;
8755         struct radeon_device *rdev = dev->dev_private;
8756         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8757         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8758         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8759         int bpc = 0;
8760         u32 tmp = 0;
8761         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8762
8763         if (connector) {
8764                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8765                 bpc = radeon_get_monitor_bpc(connector);
8766                 dither = radeon_connector->dither;
8767         }
8768
8769         /* LVDS/eDP FMT is set up by atom */
8770         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8771                 return;
8772
8773         /* not needed for analog */
8774         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8775             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8776                 return;
8777
8778         if (bpc == 0)
8779                 return;
8780
8781         switch (bpc) {
8782         case 6:
8783                 if (dither == RADEON_FMT_DITHER_ENABLE)
8784                         /* XXX sort out optimal dither settings */
8785                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8787                 else
8788                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8789                 break;
8790         case 8:
8791                 if (dither == RADEON_FMT_DITHER_ENABLE)
8792                         /* XXX sort out optimal dither settings */
8793                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8794                                 FMT_RGB_RANDOM_ENABLE |
8795                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8796                 else
8797                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8798                 break;
8799         case 10:
8800                 if (dither == RADEON_FMT_DITHER_ENABLE)
8801                         /* XXX sort out optimal dither settings */
8802                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8803                                 FMT_RGB_RANDOM_ENABLE |
8804                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8805                 else
8806                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8807                 break;
8808         default:
8809                 /* not needed */
8810                 break;
8811         }
8812
8813         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8814 }
8815
8816 /* display watermark setup */
8817 /**
8818  * dce8_line_buffer_adjust - Set up the line buffer
8819  *
8820  * @rdev: radeon_device pointer
8821  * @radeon_crtc: the selected display controller
8822  * @mode: the current display mode on the selected display
8823  * controller
8824  *
8825  * Setup up the line buffer allocation for
8826  * the selected display controller (CIK).
8827  * Returns the line buffer size in pixels.
8828  */
8829 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8830                                    struct radeon_crtc *radeon_crtc,
8831                                    struct drm_display_mode *mode)
8832 {
8833         u32 tmp, buffer_alloc, i;
8834         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8835         /*
8836          * Line Buffer Setup
8837          * There are 6 line buffers, one for each display controllers.
8838          * There are 3 partitions per LB. Select the number of partitions
8839          * to enable based on the display width.  For display widths larger
8840          * than 4096, you need use to use 2 display controllers and combine
8841          * them using the stereo blender.
8842          */
8843         if (radeon_crtc->base.enabled && mode) {
8844                 if (mode->crtc_hdisplay < 1920) {
8845                         tmp = 1;
8846                         buffer_alloc = 2;
8847                 } else if (mode->crtc_hdisplay < 2560) {
8848                         tmp = 2;
8849                         buffer_alloc = 2;
8850                 } else if (mode->crtc_hdisplay < 4096) {
8851                         tmp = 0;
8852                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8853                 } else {
8854                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8855                         tmp = 0;
8856                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8857                 }
8858         } else {
8859                 tmp = 1;
8860                 buffer_alloc = 0;
8861         }
8862
8863         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8864                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8865
8866         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8867                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8868         for (i = 0; i < rdev->usec_timeout; i++) {
8869                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8870                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8871                         break;
8872                 udelay(1);
8873         }
8874
8875         if (radeon_crtc->base.enabled && mode) {
8876                 switch (tmp) {
8877                 case 0:
8878                 default:
8879                         return 4096 * 2;
8880                 case 1:
8881                         return 1920 * 2;
8882                 case 2:
8883                         return 2560 * 2;
8884                 }
8885         }
8886
8887         /* controller not enabled, so no lb used */
8888         return 0;
8889 }
8890
8891 /**
8892  * cik_get_number_of_dram_channels - get the number of dram channels
8893  *
8894  * @rdev: radeon_device pointer
8895  *
8896  * Look up the number of video ram channels (CIK).
8897  * Used for display watermark bandwidth calculations
8898  * Returns the number of dram channels
8899  */
8900 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8901 {
8902         u32 tmp = RREG32(MC_SHARED_CHMAP);
8903
8904         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8905         case 0:
8906         default:
8907                 return 1;
8908         case 1:
8909                 return 2;
8910         case 2:
8911                 return 4;
8912         case 3:
8913                 return 8;
8914         case 4:
8915                 return 3;
8916         case 5:
8917                 return 6;
8918         case 6:
8919                 return 10;
8920         case 7:
8921                 return 12;
8922         case 8:
8923                 return 16;
8924         }
8925 }
8926
8927 struct dce8_wm_params {
8928         u32 dram_channels; /* number of dram channels */
8929         u32 yclk;          /* bandwidth per dram data pin in kHz */
8930         u32 sclk;          /* engine clock in kHz */
8931         u32 disp_clk;      /* display clock in kHz */
8932         u32 src_width;     /* viewport width */
8933         u32 active_time;   /* active display time in ns */
8934         u32 blank_time;    /* blank time in ns */
8935         bool interlaced;    /* mode is interlaced */
8936         fixed20_12 vsc;    /* vertical scale ratio */
8937         u32 num_heads;     /* number of active crtcs */
8938         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8939         u32 lb_size;       /* line buffer allocated to pipe */
8940         u32 vtaps;         /* vertical scaler taps */
8941 };
8942
8943 /**
8944  * dce8_dram_bandwidth - get the dram bandwidth
8945  *
8946  * @wm: watermark calculation data
8947  *
8948  * Calculate the raw dram bandwidth (CIK).
8949  * Used for display watermark bandwidth calculations
8950  * Returns the dram bandwidth in MBytes/s
8951  */
8952 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8953 {
8954         /* Calculate raw DRAM Bandwidth */
8955         fixed20_12 dram_efficiency; /* 0.7 */
8956         fixed20_12 yclk, dram_channels, bandwidth;
8957         fixed20_12 a;
8958
8959         a.full = dfixed_const(1000);
8960         yclk.full = dfixed_const(wm->yclk);
8961         yclk.full = dfixed_div(yclk, a);
8962         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963         a.full = dfixed_const(10);
8964         dram_efficiency.full = dfixed_const(7);
8965         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8966         bandwidth.full = dfixed_mul(dram_channels, yclk);
8967         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8968
8969         return dfixed_trunc(bandwidth);
8970 }
8971
8972 /**
8973  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8974  *
8975  * @wm: watermark calculation data
8976  *
8977  * Calculate the dram bandwidth used for display (CIK).
8978  * Used for display watermark bandwidth calculations
8979  * Returns the dram bandwidth for display in MBytes/s
8980  */
8981 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8982 {
8983         /* Calculate DRAM Bandwidth and the part allocated to display. */
8984         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8985         fixed20_12 yclk, dram_channels, bandwidth;
8986         fixed20_12 a;
8987
8988         a.full = dfixed_const(1000);
8989         yclk.full = dfixed_const(wm->yclk);
8990         yclk.full = dfixed_div(yclk, a);
8991         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8992         a.full = dfixed_const(10);
8993         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8994         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8995         bandwidth.full = dfixed_mul(dram_channels, yclk);
8996         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8997
8998         return dfixed_trunc(bandwidth);
8999 }
9000
9001 /**
9002  * dce8_data_return_bandwidth - get the data return bandwidth
9003  *
9004  * @wm: watermark calculation data
9005  *
9006  * Calculate the data return bandwidth used for display (CIK).
9007  * Used for display watermark bandwidth calculations
9008  * Returns the data return bandwidth in MBytes/s
9009  */
9010 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9011 {
9012         /* Calculate the display Data return Bandwidth */
9013         fixed20_12 return_efficiency; /* 0.8 */
9014         fixed20_12 sclk, bandwidth;
9015         fixed20_12 a;
9016
9017         a.full = dfixed_const(1000);
9018         sclk.full = dfixed_const(wm->sclk);
9019         sclk.full = dfixed_div(sclk, a);
9020         a.full = dfixed_const(10);
9021         return_efficiency.full = dfixed_const(8);
9022         return_efficiency.full = dfixed_div(return_efficiency, a);
9023         a.full = dfixed_const(32);
9024         bandwidth.full = dfixed_mul(a, sclk);
9025         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9026
9027         return dfixed_trunc(bandwidth);
9028 }
9029
9030 /**
9031  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9032  *
9033  * @wm: watermark calculation data
9034  *
9035  * Calculate the dmif bandwidth used for display (CIK).
9036  * Used for display watermark bandwidth calculations
9037  * Returns the dmif bandwidth in MBytes/s
9038  */
9039 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9040 {
9041         /* Calculate the DMIF Request Bandwidth */
9042         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9043         fixed20_12 disp_clk, bandwidth;
9044         fixed20_12 a, b;
9045
9046         a.full = dfixed_const(1000);
9047         disp_clk.full = dfixed_const(wm->disp_clk);
9048         disp_clk.full = dfixed_div(disp_clk, a);
9049         a.full = dfixed_const(32);
9050         b.full = dfixed_mul(a, disp_clk);
9051
9052         a.full = dfixed_const(10);
9053         disp_clk_request_efficiency.full = dfixed_const(8);
9054         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9055
9056         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9057
9058         return dfixed_trunc(bandwidth);
9059 }
9060
9061 /**
9062  * dce8_available_bandwidth - get the min available bandwidth
9063  *
9064  * @wm: watermark calculation data
9065  *
9066  * Calculate the min available bandwidth used for display (CIK).
9067  * Used for display watermark bandwidth calculations
9068  * Returns the min available bandwidth in MBytes/s
9069  */
9070 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9071 {
9072         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9073         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9074         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9075         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9076
9077         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9078 }
9079
9080 /**
9081  * dce8_average_bandwidth - get the average available bandwidth
9082  *
9083  * @wm: watermark calculation data
9084  *
9085  * Calculate the average available bandwidth used for display (CIK).
9086  * Used for display watermark bandwidth calculations
9087  * Returns the average available bandwidth in MBytes/s
9088  */
9089 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9090 {
9091         /* Calculate the display mode Average Bandwidth
9092          * DisplayMode should contain the source and destination dimensions,
9093          * timing, etc.
9094          */
9095         fixed20_12 bpp;
9096         fixed20_12 line_time;
9097         fixed20_12 src_width;
9098         fixed20_12 bandwidth;
9099         fixed20_12 a;
9100
9101         a.full = dfixed_const(1000);
9102         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9103         line_time.full = dfixed_div(line_time, a);
9104         bpp.full = dfixed_const(wm->bytes_per_pixel);
9105         src_width.full = dfixed_const(wm->src_width);
9106         bandwidth.full = dfixed_mul(src_width, bpp);
9107         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9108         bandwidth.full = dfixed_div(bandwidth, line_time);
9109
9110         return dfixed_trunc(bandwidth);
9111 }
9112
9113 /**
9114  * dce8_latency_watermark - get the latency watermark
9115  *
9116  * @wm: watermark calculation data
9117  *
9118  * Calculate the latency watermark (CIK).
9119  * Used for display watermark bandwidth calculations
9120  * Returns the latency watermark in ns
9121  */
9122 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9123 {
9124         /* First calculate the latency in ns */
9125         u32 mc_latency = 2000; /* 2000 ns. */
9126         u32 available_bandwidth = dce8_available_bandwidth(wm);
9127         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9128         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9129         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9130         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9131                 (wm->num_heads * cursor_line_pair_return_time);
9132         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9133         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9134         u32 tmp, dmif_size = 12288;
9135         fixed20_12 a, b, c;
9136
9137         if (wm->num_heads == 0)
9138                 return 0;
9139
9140         a.full = dfixed_const(2);
9141         b.full = dfixed_const(1);
9142         if ((wm->vsc.full > a.full) ||
9143             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9144             (wm->vtaps >= 5) ||
9145             ((wm->vsc.full >= a.full) && wm->interlaced))
9146                 max_src_lines_per_dst_line = 4;
9147         else
9148                 max_src_lines_per_dst_line = 2;
9149
9150         a.full = dfixed_const(available_bandwidth);
9151         b.full = dfixed_const(wm->num_heads);
9152         a.full = dfixed_div(a, b);
9153         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9154         tmp = min(dfixed_trunc(a), tmp);
9155
9156         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9157
9158         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9159         b.full = dfixed_const(1000);
9160         c.full = dfixed_const(lb_fill_bw);
9161         b.full = dfixed_div(c, b);
9162         a.full = dfixed_div(a, b);
9163         line_fill_time = dfixed_trunc(a);
9164
9165         if (line_fill_time < wm->active_time)
9166                 return latency;
9167         else
9168                 return latency + (line_fill_time - wm->active_time);
9169
9170 }
9171
9172 /**
9173  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9174  * average and available dram bandwidth
9175  *
9176  * @wm: watermark calculation data
9177  *
9178  * Check if the display average bandwidth fits in the display
9179  * dram bandwidth (CIK).
9180  * Used for display watermark bandwidth calculations
9181  * Returns true if the display fits, false if not.
9182  */
9183 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9184 {
9185         if (dce8_average_bandwidth(wm) <=
9186             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9187                 return true;
9188         else
9189                 return false;
9190 }
9191
9192 /**
9193  * dce8_average_bandwidth_vs_available_bandwidth - check
9194  * average and available bandwidth
9195  *
9196  * @wm: watermark calculation data
9197  *
9198  * Check if the display average bandwidth fits in the display
9199  * available bandwidth (CIK).
9200  * Used for display watermark bandwidth calculations
9201  * Returns true if the display fits, false if not.
9202  */
9203 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9204 {
9205         if (dce8_average_bandwidth(wm) <=
9206             (dce8_available_bandwidth(wm) / wm->num_heads))
9207                 return true;
9208         else
9209                 return false;
9210 }
9211
9212 /**
9213  * dce8_check_latency_hiding - check latency hiding
9214  *
9215  * @wm: watermark calculation data
9216  *
9217  * Check latency hiding (CIK).
9218  * Used for display watermark bandwidth calculations
9219  * Returns true if the display fits, false if not.
9220  */
9221 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9222 {
9223         u32 lb_partitions = wm->lb_size / wm->src_width;
9224         u32 line_time = wm->active_time + wm->blank_time;
9225         u32 latency_tolerant_lines;
9226         u32 latency_hiding;
9227         fixed20_12 a;
9228
9229         a.full = dfixed_const(1);
9230         if (wm->vsc.full > a.full)
9231                 latency_tolerant_lines = 1;
9232         else {
9233                 if (lb_partitions <= (wm->vtaps + 1))
9234                         latency_tolerant_lines = 1;
9235                 else
9236                         latency_tolerant_lines = 2;
9237         }
9238
9239         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9240
9241         if (dce8_latency_watermark(wm) <= latency_hiding)
9242                 return true;
9243         else
9244                 return false;
9245 }
9246
9247 /**
9248  * dce8_program_watermarks - program display watermarks
9249  *
9250  * @rdev: radeon_device pointer
9251  * @radeon_crtc: the selected display controller
9252  * @lb_size: line buffer size
9253  * @num_heads: number of display controllers in use
9254  *
9255  * Calculate and program the display watermarks for the
9256  * selected display controller (CIK).
9257  */
9258 static void dce8_program_watermarks(struct radeon_device *rdev,
9259                                     struct radeon_crtc *radeon_crtc,
9260                                     u32 lb_size, u32 num_heads)
9261 {
9262         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9263         struct dce8_wm_params wm_low, wm_high;
9264         u32 active_time;
9265         u32 line_time = 0;
9266         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9267         u32 tmp, wm_mask;
9268
9269         if (radeon_crtc->base.enabled && num_heads && mode) {
9270                 active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
9271                 line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
9272
9273                 /* watermark for high clocks */
9274                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9275                     rdev->pm.dpm_enabled) {
9276                         wm_high.yclk =
9277                                 radeon_dpm_get_mclk(rdev, false) * 10;
9278                         wm_high.sclk =
9279                                 radeon_dpm_get_sclk(rdev, false) * 10;
9280                 } else {
9281                         wm_high.yclk = rdev->pm.current_mclk * 10;
9282                         wm_high.sclk = rdev->pm.current_sclk * 10;
9283                 }
9284
9285                 wm_high.disp_clk = mode->clock;
9286                 wm_high.src_width = mode->crtc_hdisplay;
9287                 wm_high.active_time = active_time;
9288                 wm_high.blank_time = line_time - wm_high.active_time;
9289                 wm_high.interlaced = false;
9290                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9291                         wm_high.interlaced = true;
9292                 wm_high.vsc = radeon_crtc->vsc;
9293                 wm_high.vtaps = 1;
9294                 if (radeon_crtc->rmx_type != RMX_OFF)
9295                         wm_high.vtaps = 2;
9296                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9297                 wm_high.lb_size = lb_size;
9298                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9299                 wm_high.num_heads = num_heads;
9300
9301                 /* set for high clocks */
9302                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9303
9304                 /* possibly force display priority to high */
9305                 /* should really do this at mode validation time... */
9306                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9307                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9308                     !dce8_check_latency_hiding(&wm_high) ||
9309                     (rdev->disp_priority == 2)) {
9310                         DRM_DEBUG_KMS("force priority to high\n");
9311                 }
9312
9313                 /* watermark for low clocks */
9314                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9315                     rdev->pm.dpm_enabled) {
9316                         wm_low.yclk =
9317                                 radeon_dpm_get_mclk(rdev, true) * 10;
9318                         wm_low.sclk =
9319                                 radeon_dpm_get_sclk(rdev, true) * 10;
9320                 } else {
9321                         wm_low.yclk = rdev->pm.current_mclk * 10;
9322                         wm_low.sclk = rdev->pm.current_sclk * 10;
9323                 }
9324
9325                 wm_low.disp_clk = mode->clock;
9326                 wm_low.src_width = mode->crtc_hdisplay;
9327                 wm_low.active_time = active_time;
9328                 wm_low.blank_time = line_time - wm_low.active_time;
9329                 wm_low.interlaced = false;
9330                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9331                         wm_low.interlaced = true;
9332                 wm_low.vsc = radeon_crtc->vsc;
9333                 wm_low.vtaps = 1;
9334                 if (radeon_crtc->rmx_type != RMX_OFF)
9335                         wm_low.vtaps = 2;
9336                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9337                 wm_low.lb_size = lb_size;
9338                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9339                 wm_low.num_heads = num_heads;
9340
9341                 /* set for low clocks */
9342                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9343
9344                 /* possibly force display priority to high */
9345                 /* should really do this at mode validation time... */
9346                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9347                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9348                     !dce8_check_latency_hiding(&wm_low) ||
9349                     (rdev->disp_priority == 2)) {
9350                         DRM_DEBUG_KMS("force priority to high\n");
9351                 }
9352
9353                 /* Save number of lines the linebuffer leads before the scanout */
9354                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9355         }
9356
9357         /* select wm A */
9358         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9359         tmp = wm_mask;
9360         tmp &= ~LATENCY_WATERMARK_MASK(3);
9361         tmp |= LATENCY_WATERMARK_MASK(1);
9362         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9363         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9364                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9365                 LATENCY_HIGH_WATERMARK(line_time)));
9366         /* select wm B */
9367         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9368         tmp &= ~LATENCY_WATERMARK_MASK(3);
9369         tmp |= LATENCY_WATERMARK_MASK(2);
9370         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9371         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9372                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9373                 LATENCY_HIGH_WATERMARK(line_time)));
9374         /* restore original selection */
9375         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9376
9377         /* save values for DPM */
9378         radeon_crtc->line_time = line_time;
9379         radeon_crtc->wm_high = latency_watermark_a;
9380         radeon_crtc->wm_low = latency_watermark_b;
9381 }
9382
9383 /**
9384  * dce8_bandwidth_update - program display watermarks
9385  *
9386  * @rdev: radeon_device pointer
9387  *
9388  * Calculate and program the display watermarks and line
9389  * buffer allocation (CIK).
9390  */
9391 void dce8_bandwidth_update(struct radeon_device *rdev)
9392 {
9393         struct drm_display_mode *mode = NULL;
9394         u32 num_heads = 0, lb_size;
9395         int i;
9396
9397         if (!rdev->mode_info.mode_config_initialized)
9398                 return;
9399
9400         radeon_update_display_priority(rdev);
9401
9402         for (i = 0; i < rdev->num_crtc; i++) {
9403                 if (rdev->mode_info.crtcs[i]->base.enabled)
9404                         num_heads++;
9405         }
9406         for (i = 0; i < rdev->num_crtc; i++) {
9407                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9408                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9409                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9410         }
9411 }
9412
9413 /**
9414  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9415  *
9416  * @rdev: radeon_device pointer
9417  *
9418  * Fetches a GPU clock counter snapshot (SI).
9419  * Returns the 64 bit clock counter snapshot.
9420  */
9421 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9422 {
9423         uint64_t clock;
9424
9425         mutex_lock(&rdev->gpu_clock_mutex);
9426         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9427         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9428                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9429         mutex_unlock(&rdev->gpu_clock_mutex);
9430         return clock;
9431 }
9432
9433 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9434                              u32 cntl_reg, u32 status_reg)
9435 {
9436         int r, i;
9437         struct atom_clock_dividers dividers;
9438         uint32_t tmp;
9439
9440         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9441                                            clock, false, &dividers);
9442         if (r)
9443                 return r;
9444
9445         tmp = RREG32_SMC(cntl_reg);
9446         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9447         tmp |= dividers.post_divider;
9448         WREG32_SMC(cntl_reg, tmp);
9449
9450         for (i = 0; i < 100; i++) {
9451                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9452                         break;
9453                 mdelay(10);
9454         }
9455         if (i == 100)
9456                 return -ETIMEDOUT;
9457
9458         return 0;
9459 }
9460
9461 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9462 {
9463         int r = 0;
9464
9465         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9466         if (r)
9467                 return r;
9468
9469         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9470         return r;
9471 }
9472
9473 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9474 {
9475         int r, i;
9476         struct atom_clock_dividers dividers;
9477         u32 tmp;
9478
9479         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9480                                            ecclk, false, &dividers);
9481         if (r)
9482                 return r;
9483
9484         for (i = 0; i < 100; i++) {
9485                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9486                         break;
9487                 mdelay(10);
9488         }
9489         if (i == 100)
9490                 return -ETIMEDOUT;
9491
9492         tmp = RREG32_SMC(CG_ECLK_CNTL);
9493         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9494         tmp |= dividers.post_divider;
9495         WREG32_SMC(CG_ECLK_CNTL, tmp);
9496
9497         for (i = 0; i < 100; i++) {
9498                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9499                         break;
9500                 mdelay(10);
9501         }
9502         if (i == 100)
9503                 return -ETIMEDOUT;
9504
9505         return 0;
9506 }
9507
9508 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9509 {
9510         struct pci_dev *root = rdev->pdev->bus->self;
9511         int bridge_pos, gpu_pos;
9512         u32 speed_cntl, mask, current_data_rate;
9513         int ret, i;
9514         u16 tmp16;
9515
9516         if (pci_is_root_bus(rdev->pdev->bus))
9517                 return;
9518
9519         if (radeon_pcie_gen2 == 0)
9520                 return;
9521
9522         if (rdev->flags & RADEON_IS_IGP)
9523                 return;
9524
9525         if (!(rdev->flags & RADEON_IS_PCIE))
9526                 return;
9527
9528         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9529         if (ret != 0)
9530                 return;
9531
9532         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9533                 return;
9534
9535         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9536         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9537                 LC_CURRENT_DATA_RATE_SHIFT;
9538         if (mask & DRM_PCIE_SPEED_80) {
9539                 if (current_data_rate == 2) {
9540                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9541                         return;
9542                 }
9543                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9544         } else if (mask & DRM_PCIE_SPEED_50) {
9545                 if (current_data_rate == 1) {
9546                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9547                         return;
9548                 }
9549                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9550         }
9551
9552         bridge_pos = pci_pcie_cap(root);
9553         if (!bridge_pos)
9554                 return;
9555
9556         gpu_pos = pci_pcie_cap(rdev->pdev);
9557         if (!gpu_pos)
9558                 return;
9559
9560         if (mask & DRM_PCIE_SPEED_80) {
9561                 /* re-try equalization if gen3 is not already enabled */
9562                 if (current_data_rate != 2) {
9563                         u16 bridge_cfg, gpu_cfg;
9564                         u16 bridge_cfg2, gpu_cfg2;
9565                         u32 max_lw, current_lw, tmp;
9566
9567                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9568                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9569
9570                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9571                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9572
9573                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9574                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9575
9576                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9577                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9578                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9579
9580                         if (current_lw < max_lw) {
9581                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9582                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9583                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9584                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9585                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9586                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9587                                 }
9588                         }
9589
9590                         for (i = 0; i < 10; i++) {
9591                                 /* check status */
9592                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9593                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9594                                         break;
9595
9596                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9597                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9598
9599                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9600                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9601
9602                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9603                                 tmp |= LC_SET_QUIESCE;
9604                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9605
9606                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9607                                 tmp |= LC_REDO_EQ;
9608                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9609
9610                                 mdelay(100);
9611
9612                                 /* linkctl */
9613                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9614                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9615                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9616                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9617
9618                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9619                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9620                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9621                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9622
9623                                 /* linkctl2 */
9624                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9625                                 tmp16 &= ~((1 << 4) | (7 << 9));
9626                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9627                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9628
9629                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9630                                 tmp16 &= ~((1 << 4) | (7 << 9));
9631                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9632                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9633
9634                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9635                                 tmp &= ~LC_SET_QUIESCE;
9636                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9637                         }
9638                 }
9639         }
9640
9641         /* set the link speed */
9642         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9643         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9644         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9645
9646         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9647         tmp16 &= ~0xf;
9648         if (mask & DRM_PCIE_SPEED_80)
9649                 tmp16 |= 3; /* gen3 */
9650         else if (mask & DRM_PCIE_SPEED_50)
9651                 tmp16 |= 2; /* gen2 */
9652         else
9653                 tmp16 |= 1; /* gen1 */
9654         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9655
9656         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9657         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9658         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9659
9660         for (i = 0; i < rdev->usec_timeout; i++) {
9661                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9662                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9663                         break;
9664                 udelay(1);
9665         }
9666 }
9667
9668 static void cik_program_aspm(struct radeon_device *rdev)
9669 {
9670         u32 data, orig;
9671         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9672         bool disable_clkreq = false;
9673
9674         if (radeon_aspm == 0)
9675                 return;
9676
9677         /* XXX double check IGPs */
9678         if (rdev->flags & RADEON_IS_IGP)
9679                 return;
9680
9681         if (!(rdev->flags & RADEON_IS_PCIE))
9682                 return;
9683
9684         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9685         data &= ~LC_XMIT_N_FTS_MASK;
9686         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9687         if (orig != data)
9688                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9689
9690         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9691         data |= LC_GO_TO_RECOVERY;
9692         if (orig != data)
9693                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9694
9695         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9696         data |= P_IGNORE_EDB_ERR;
9697         if (orig != data)
9698                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9699
9700         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9701         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9702         data |= LC_PMI_TO_L1_DIS;
9703         if (!disable_l0s)
9704                 data |= LC_L0S_INACTIVITY(7);
9705
9706         if (!disable_l1) {
9707                 data |= LC_L1_INACTIVITY(7);
9708                 data &= ~LC_PMI_TO_L1_DIS;
9709                 if (orig != data)
9710                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9711
9712                 if (!disable_plloff_in_l1) {
9713                         bool clk_req_support;
9714
9715                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9716                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9717                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9718                         if (orig != data)
9719                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9720
9721                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9722                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9723                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9724                         if (orig != data)
9725                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9726
9727                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9728                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9729                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9730                         if (orig != data)
9731                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9732
9733                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9734                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9735                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9736                         if (orig != data)
9737                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9738
9739                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9740                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9741                         data |= LC_DYN_LANES_PWR_STATE(3);
9742                         if (orig != data)
9743                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9744
9745                         if (!disable_clkreq &&
9746                             !pci_is_root_bus(rdev->pdev->bus)) {
9747                                 struct pci_dev *root = rdev->pdev->bus->self;
9748                                 u32 lnkcap;
9749
9750                                 clk_req_support = false;
9751                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9752                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9753                                         clk_req_support = true;
9754                         } else {
9755                                 clk_req_support = false;
9756                         }
9757
9758                         if (clk_req_support) {
9759                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9760                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9761                                 if (orig != data)
9762                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9763
9764                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9765                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9766                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9767                                 if (orig != data)
9768                                         WREG32_SMC(THM_CLK_CNTL, data);
9769
9770                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9771                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9772                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9773                                 if (orig != data)
9774                                         WREG32_SMC(MISC_CLK_CTRL, data);
9775
9776                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9777                                 data &= ~BCLK_AS_XCLK;
9778                                 if (orig != data)
9779                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9780
9781                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9782                                 data &= ~FORCE_BIF_REFCLK_EN;
9783                                 if (orig != data)
9784                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9785
9786                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9787                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9788                                 data |= MPLL_CLKOUT_SEL(4);
9789                                 if (orig != data)
9790                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9791                         }
9792                 }
9793         } else {
9794                 if (orig != data)
9795                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9796         }
9797
9798         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9799         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9800         if (orig != data)
9801                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9802
9803         if (!disable_l0s) {
9804                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9805                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9806                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9807                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9808                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9809                                 data &= ~LC_L0S_INACTIVITY_MASK;
9810                                 if (orig != data)
9811                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9812                         }
9813                 }
9814         }
9815 }