]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge branch 'i2c/for-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
57
58 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
67
68 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
69 MODULE_FIRMWARE("radeon/hawaii_me.bin");
70 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
72 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
74 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
75 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
77
78 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
84
85 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
86 MODULE_FIRMWARE("radeon/kaveri_me.bin");
87 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
88 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
89 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
90 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
91 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
92
93 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
94 MODULE_FIRMWARE("radeon/KABINI_me.bin");
95 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
96 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
97 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
98 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
99
100 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
101 MODULE_FIRMWARE("radeon/kabini_me.bin");
102 MODULE_FIRMWARE("radeon/kabini_ce.bin");
103 MODULE_FIRMWARE("radeon/kabini_mec.bin");
104 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
105 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
106
107 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
113
114 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
115 MODULE_FIRMWARE("radeon/mullins_me.bin");
116 MODULE_FIRMWARE("radeon/mullins_ce.bin");
117 MODULE_FIRMWARE("radeon/mullins_mec.bin");
118 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
119 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
120
121 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
122 extern void r600_ih_ring_fini(struct radeon_device *rdev);
123 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
124 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 extern void sumo_rlc_fini(struct radeon_device *rdev);
127 extern int sumo_rlc_init(struct radeon_device *rdev);
128 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
129 extern void si_rlc_reset(struct radeon_device *rdev);
130 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
131 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
132 extern int cik_sdma_resume(struct radeon_device *rdev);
133 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
134 extern void cik_sdma_fini(struct radeon_device *rdev);
135 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
136 static void cik_rlc_stop(struct radeon_device *rdev);
137 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
138 static void cik_program_aspm(struct radeon_device *rdev);
139 static void cik_init_pg(struct radeon_device *rdev);
140 static void cik_init_cg(struct radeon_device *rdev);
141 static void cik_fini_pg(struct radeon_device *rdev);
142 static void cik_fini_cg(struct radeon_device *rdev);
143 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
144                                           bool enable);
145
146 /**
147  * cik_get_allowed_info_register - fetch the register for the info ioctl
148  *
149  * @rdev: radeon_device pointer
150  * @reg: register offset in bytes
151  * @val: register value
152  *
153  * Returns 0 for success or -EINVAL for an invalid register
154  *
155  */
156 int cik_get_allowed_info_register(struct radeon_device *rdev,
157                                   u32 reg, u32 *val)
158 {
159         switch (reg) {
160         case GRBM_STATUS:
161         case GRBM_STATUS2:
162         case GRBM_STATUS_SE0:
163         case GRBM_STATUS_SE1:
164         case GRBM_STATUS_SE2:
165         case GRBM_STATUS_SE3:
166         case SRBM_STATUS:
167         case SRBM_STATUS2:
168         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
169         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
170         case UVD_STATUS:
171         /* TODO VCE */
172                 *val = RREG32(reg);
173                 return 0;
174         default:
175                 return -EINVAL;
176         }
177 }
178
179 /*
180  * Indirect registers accessor
181  */
182 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
183 {
184         unsigned long flags;
185         u32 r;
186
187         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
188         WREG32(CIK_DIDT_IND_INDEX, (reg));
189         r = RREG32(CIK_DIDT_IND_DATA);
190         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
191         return r;
192 }
193
194 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
195 {
196         unsigned long flags;
197
198         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
199         WREG32(CIK_DIDT_IND_INDEX, (reg));
200         WREG32(CIK_DIDT_IND_DATA, (v));
201         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
202 }
203
204 /* get temperature in millidegrees */
205 int ci_get_temp(struct radeon_device *rdev)
206 {
207         u32 temp;
208         int actual_temp = 0;
209
210         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
211                 CTF_TEMP_SHIFT;
212
213         if (temp & 0x200)
214                 actual_temp = 255;
215         else
216                 actual_temp = temp & 0x1ff;
217
218         actual_temp = actual_temp * 1000;
219
220         return actual_temp;
221 }
222
223 /* get temperature in millidegrees */
224 int kv_get_temp(struct radeon_device *rdev)
225 {
226         u32 temp;
227         int actual_temp = 0;
228
229         temp = RREG32_SMC(0xC0300E0C);
230
231         if (temp)
232                 actual_temp = (temp / 8) - 49;
233         else
234                 actual_temp = 0;
235
236         actual_temp = actual_temp * 1000;
237
238         return actual_temp;
239 }
240
241 /*
242  * Indirect registers accessor
243  */
244 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
245 {
246         unsigned long flags;
247         u32 r;
248
249         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250         WREG32(PCIE_INDEX, reg);
251         (void)RREG32(PCIE_INDEX);
252         r = RREG32(PCIE_DATA);
253         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
254         return r;
255 }
256
257 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
258 {
259         unsigned long flags;
260
261         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
262         WREG32(PCIE_INDEX, reg);
263         (void)RREG32(PCIE_INDEX);
264         WREG32(PCIE_DATA, v);
265         (void)RREG32(PCIE_DATA);
266         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
267 }
268
269 static const u32 spectre_rlc_save_restore_register_list[] =
270 {
271         (0x0e00 << 16) | (0xc12c >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc140 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc150 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc15c >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc168 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc170 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc178 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc204 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b4 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2b8 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2bc >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc2c0 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x8228 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x829c >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x869c >> 2),
300         0x00000000,
301         (0x0600 << 16) | (0x98f4 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x98f8 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x9900 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc260 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x90e8 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c000 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x3c00c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x8c1c >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x9700 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x4e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x5e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x6e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x7e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x8e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0x9e00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xae00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0xbe00 << 16) | (0xcd20 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x89bc >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x8900 >> 2),
340         0x00000000,
341         0x3,
342         (0x0e00 << 16) | (0xc130 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc134 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc1fc >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc208 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc264 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc268 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc26c >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc270 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc274 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc278 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc27c >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc280 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc284 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc288 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc28c >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc290 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc294 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc298 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc29c >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a0 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a4 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2a8 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2ac  >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0xc2b0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x301d0 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30238 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30250 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30254 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x30258 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x3025c >> 2),
401         0x00000000,
402         (0x4e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x5e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x6e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x7e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x8e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0x9e00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xae00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0xbe00 << 16) | (0xc900 >> 2),
417         0x00000000,
418         (0x4e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x5e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x6e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x7e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x8e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0x9e00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xae00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0xbe00 << 16) | (0xc904 >> 2),
433         0x00000000,
434         (0x4e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x5e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x6e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x7e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x8e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0x9e00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xae00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0xbe00 << 16) | (0xc908 >> 2),
449         0x00000000,
450         (0x4e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x5e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x6e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x7e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x8e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0x9e00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xae00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0xbe00 << 16) | (0xc90c >> 2),
465         0x00000000,
466         (0x4e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x5e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x6e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x7e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x8e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0x9e00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xae00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0xbe00 << 16) | (0xc910 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0xc99c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0x9834 >> 2),
485         0x00000000,
486         (0x0000 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0001 << 16) | (0x30f00 >> 2),
489         0x00000000,
490         (0x0000 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0001 << 16) | (0x30f04 >> 2),
493         0x00000000,
494         (0x0000 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0001 << 16) | (0x30f08 >> 2),
497         0x00000000,
498         (0x0000 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0001 << 16) | (0x30f0c >> 2),
501         0x00000000,
502         (0x0600 << 16) | (0x9b7c >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a14 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8a18 >> 2),
507         0x00000000,
508         (0x0600 << 16) | (0x30a00 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bf0 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8bcc >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8b24 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x30a04 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a10 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a14 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a18 >> 2),
523         0x00000000,
524         (0x0600 << 16) | (0x30a2c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc700 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc704 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc708 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xc768 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc770 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc774 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc778 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc77c >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc780 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc784 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc788 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc78c >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc798 >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc79c >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a0 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a4 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7a8 >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7ac >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b0 >> 2),
563         0x00000000,
564         (0x0400 << 16) | (0xc7b4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x9100 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x3c010 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92a8 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92ac >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b4 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92b8 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92bc >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c0 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92c8 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92cc >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x92d0 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c00 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c04 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c20 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c38 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x8c3c >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xae00 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0x9604 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac08 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac0c >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac10 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac14 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac58 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac68 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac6c >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac70 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac74 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac78 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac7c >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac80 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac84 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac88 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xac8c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x970c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9714 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9718 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x971c >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x4e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x5e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x6e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x7e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x8e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0x9e00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xae00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0xbe00 << 16) | (0x31068 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd10 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xcd14 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b0 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b4 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88b8 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88bc >> 2),
671         0x00000000,
672         (0x0400 << 16) | (0x89c0 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c4 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88c8 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d0 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d4 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x88d8 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x8980 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x30938 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x3093c >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x30940 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x89a0 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30900 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30904 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x89b4 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c210 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c214 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3c218 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x8904 >> 2),
707         0x00000000,
708         0x5,
709         (0x0e00 << 16) | (0x8c28 >> 2),
710         (0x0e00 << 16) | (0x8c2c >> 2),
711         (0x0e00 << 16) | (0x8c30 >> 2),
712         (0x0e00 << 16) | (0x8c34 >> 2),
713         (0x0e00 << 16) | (0x9600 >> 2),
714 };
715
716 static const u32 kalindi_rlc_save_restore_register_list[] =
717 {
718         (0x0e00 << 16) | (0xc12c >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc140 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc150 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc15c >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc168 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc170 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc204 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b4 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2b8 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2bc >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc2c0 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x8228 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x829c >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x869c >> 2),
745         0x00000000,
746         (0x0600 << 16) | (0x98f4 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x98f8 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x9900 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0xc260 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x90e8 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c000 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x3c00c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x8c1c >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x9700 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x4e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x5e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x6e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x7e00 << 16) | (0xcd20 >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x89bc >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x8900 >> 2),
777         0x00000000,
778         0x3,
779         (0x0e00 << 16) | (0xc130 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc134 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc1fc >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc208 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc264 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc268 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc26c >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc270 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc274 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc28c >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc290 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc294 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc298 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a0 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a4 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2a8 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xc2ac >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x301d0 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30238 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30250 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30254 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x30258 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0x3025c >> 2),
824         0x00000000,
825         (0x4e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x5e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x6e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x7e00 << 16) | (0xc900 >> 2),
832         0x00000000,
833         (0x4e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x5e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x6e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x7e00 << 16) | (0xc904 >> 2),
840         0x00000000,
841         (0x4e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x5e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x6e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x7e00 << 16) | (0xc908 >> 2),
848         0x00000000,
849         (0x4e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x5e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x6e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x7e00 << 16) | (0xc90c >> 2),
856         0x00000000,
857         (0x4e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x5e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x6e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x7e00 << 16) | (0xc910 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0xc99c >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x9834 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f00 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f04 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f08 >> 2),
874         0x00000000,
875         (0x0000 << 16) | (0x30f0c >> 2),
876         0x00000000,
877         (0x0600 << 16) | (0x9b7c >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a14 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x8a18 >> 2),
882         0x00000000,
883         (0x0600 << 16) | (0x30a00 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bf0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8bcc >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x8b24 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x30a04 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a10 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a14 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a18 >> 2),
898         0x00000000,
899         (0x0600 << 16) | (0x30a2c >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc700 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc704 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc708 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xc768 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc770 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc774 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc798 >> 2),
914         0x00000000,
915         (0x0400 << 16) | (0xc79c >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x9100 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x3c010 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c00 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c04 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c20 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c38 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8c3c >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0xae00 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0x9604 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac08 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac0c >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac10 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac14 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac58 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac68 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac6c >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac70 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac74 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac78 >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac7c >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac80 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac84 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac88 >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0xac8c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x970c >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9714 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x9718 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x971c >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x4e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x5e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x6e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x7e00 << 16) | (0x31068 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd10 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0xcd14 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b0 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b4 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88b8 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88bc >> 2),
994         0x00000000,
995         (0x0400 << 16) | (0x89c0 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c4 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88c8 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d0 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d4 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x88d8 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x8980 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x30938 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x3093c >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x30940 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x89a0 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30900 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x30904 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x89b4 >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3e1fc >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c210 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c214 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x3c218 >> 2),
1030         0x00000000,
1031         (0x0e00 << 16) | (0x8904 >> 2),
1032         0x00000000,
1033         0x5,
1034         (0x0e00 << 16) | (0x8c28 >> 2),
1035         (0x0e00 << 16) | (0x8c2c >> 2),
1036         (0x0e00 << 16) | (0x8c30 >> 2),
1037         (0x0e00 << 16) | (0x8c34 >> 2),
1038         (0x0e00 << 16) | (0x9600 >> 2),
1039 };
1040
1041 static const u32 bonaire_golden_spm_registers[] =
1042 {
1043         0x30800, 0xe0ffffff, 0xe0000000
1044 };
1045
1046 static const u32 bonaire_golden_common_registers[] =
1047 {
1048         0xc770, 0xffffffff, 0x00000800,
1049         0xc774, 0xffffffff, 0x00000800,
1050         0xc798, 0xffffffff, 0x00007fbf,
1051         0xc79c, 0xffffffff, 0x00007faf
1052 };
1053
1054 static const u32 bonaire_golden_registers[] =
1055 {
1056         0x3354, 0x00000333, 0x00000333,
1057         0x3350, 0x000c0fc0, 0x00040200,
1058         0x9a10, 0x00010000, 0x00058208,
1059         0x3c000, 0xffff1fff, 0x00140000,
1060         0x3c200, 0xfdfc0fff, 0x00000100,
1061         0x3c234, 0x40000000, 0x40000200,
1062         0x9830, 0xffffffff, 0x00000000,
1063         0x9834, 0xf00fffff, 0x00000400,
1064         0x9838, 0x0002021c, 0x00020200,
1065         0xc78, 0x00000080, 0x00000000,
1066         0x5bb0, 0x000000f0, 0x00000070,
1067         0x5bc0, 0xf0311fff, 0x80300000,
1068         0x98f8, 0x73773777, 0x12010001,
1069         0x350c, 0x00810000, 0x408af000,
1070         0x7030, 0x31000111, 0x00000011,
1071         0x2f48, 0x73773777, 0x12010001,
1072         0x220c, 0x00007fb6, 0x0021a1b1,
1073         0x2210, 0x00007fb6, 0x002021b1,
1074         0x2180, 0x00007fb6, 0x00002191,
1075         0x2218, 0x00007fb6, 0x002121b1,
1076         0x221c, 0x00007fb6, 0x002021b1,
1077         0x21dc, 0x00007fb6, 0x00002191,
1078         0x21e0, 0x00007fb6, 0x00002191,
1079         0x3628, 0x0000003f, 0x0000000a,
1080         0x362c, 0x0000003f, 0x0000000a,
1081         0x2ae4, 0x00073ffe, 0x000022a2,
1082         0x240c, 0x000007ff, 0x00000000,
1083         0x8a14, 0xf000003f, 0x00000007,
1084         0x8bf0, 0x00002001, 0x00000001,
1085         0x8b24, 0xffffffff, 0x00ffffff,
1086         0x30a04, 0x0000ff0f, 0x00000000,
1087         0x28a4c, 0x07ffffff, 0x06000000,
1088         0x4d8, 0x00000fff, 0x00000100,
1089         0x3e78, 0x00000001, 0x00000002,
1090         0x9100, 0x03000000, 0x0362c688,
1091         0x8c00, 0x000000ff, 0x00000001,
1092         0xe40, 0x00001fff, 0x00001fff,
1093         0x9060, 0x0000007f, 0x00000020,
1094         0x9508, 0x00010000, 0x00010000,
1095         0xac14, 0x000003ff, 0x000000f3,
1096         0xac0c, 0xffffffff, 0x00001032
1097 };
1098
1099 static const u32 bonaire_mgcg_cgcg_init[] =
1100 {
1101         0xc420, 0xffffffff, 0xfffffffc,
1102         0x30800, 0xffffffff, 0xe0000000,
1103         0x3c2a0, 0xffffffff, 0x00000100,
1104         0x3c208, 0xffffffff, 0x00000100,
1105         0x3c2c0, 0xffffffff, 0xc0000100,
1106         0x3c2c8, 0xffffffff, 0xc0000100,
1107         0x3c2c4, 0xffffffff, 0xc0000100,
1108         0x55e4, 0xffffffff, 0x00600100,
1109         0x3c280, 0xffffffff, 0x00000100,
1110         0x3c214, 0xffffffff, 0x06000100,
1111         0x3c220, 0xffffffff, 0x00000100,
1112         0x3c218, 0xffffffff, 0x06000100,
1113         0x3c204, 0xffffffff, 0x00000100,
1114         0x3c2e0, 0xffffffff, 0x00000100,
1115         0x3c224, 0xffffffff, 0x00000100,
1116         0x3c200, 0xffffffff, 0x00000100,
1117         0x3c230, 0xffffffff, 0x00000100,
1118         0x3c234, 0xffffffff, 0x00000100,
1119         0x3c250, 0xffffffff, 0x00000100,
1120         0x3c254, 0xffffffff, 0x00000100,
1121         0x3c258, 0xffffffff, 0x00000100,
1122         0x3c25c, 0xffffffff, 0x00000100,
1123         0x3c260, 0xffffffff, 0x00000100,
1124         0x3c27c, 0xffffffff, 0x00000100,
1125         0x3c278, 0xffffffff, 0x00000100,
1126         0x3c210, 0xffffffff, 0x06000100,
1127         0x3c290, 0xffffffff, 0x00000100,
1128         0x3c274, 0xffffffff, 0x00000100,
1129         0x3c2b4, 0xffffffff, 0x00000100,
1130         0x3c2b0, 0xffffffff, 0x00000100,
1131         0x3c270, 0xffffffff, 0x00000100,
1132         0x30800, 0xffffffff, 0xe0000000,
1133         0x3c020, 0xffffffff, 0x00010000,
1134         0x3c024, 0xffffffff, 0x00030002,
1135         0x3c028, 0xffffffff, 0x00040007,
1136         0x3c02c, 0xffffffff, 0x00060005,
1137         0x3c030, 0xffffffff, 0x00090008,
1138         0x3c034, 0xffffffff, 0x00010000,
1139         0x3c038, 0xffffffff, 0x00030002,
1140         0x3c03c, 0xffffffff, 0x00040007,
1141         0x3c040, 0xffffffff, 0x00060005,
1142         0x3c044, 0xffffffff, 0x00090008,
1143         0x3c048, 0xffffffff, 0x00010000,
1144         0x3c04c, 0xffffffff, 0x00030002,
1145         0x3c050, 0xffffffff, 0x00040007,
1146         0x3c054, 0xffffffff, 0x00060005,
1147         0x3c058, 0xffffffff, 0x00090008,
1148         0x3c05c, 0xffffffff, 0x00010000,
1149         0x3c060, 0xffffffff, 0x00030002,
1150         0x3c064, 0xffffffff, 0x00040007,
1151         0x3c068, 0xffffffff, 0x00060005,
1152         0x3c06c, 0xffffffff, 0x00090008,
1153         0x3c070, 0xffffffff, 0x00010000,
1154         0x3c074, 0xffffffff, 0x00030002,
1155         0x3c078, 0xffffffff, 0x00040007,
1156         0x3c07c, 0xffffffff, 0x00060005,
1157         0x3c080, 0xffffffff, 0x00090008,
1158         0x3c084, 0xffffffff, 0x00010000,
1159         0x3c088, 0xffffffff, 0x00030002,
1160         0x3c08c, 0xffffffff, 0x00040007,
1161         0x3c090, 0xffffffff, 0x00060005,
1162         0x3c094, 0xffffffff, 0x00090008,
1163         0x3c098, 0xffffffff, 0x00010000,
1164         0x3c09c, 0xffffffff, 0x00030002,
1165         0x3c0a0, 0xffffffff, 0x00040007,
1166         0x3c0a4, 0xffffffff, 0x00060005,
1167         0x3c0a8, 0xffffffff, 0x00090008,
1168         0x3c000, 0xffffffff, 0x96e00200,
1169         0x8708, 0xffffffff, 0x00900100,
1170         0xc424, 0xffffffff, 0x0020003f,
1171         0x38, 0xffffffff, 0x0140001c,
1172         0x3c, 0x000f0000, 0x000f0000,
1173         0x220, 0xffffffff, 0xC060000C,
1174         0x224, 0xc0000fff, 0x00000100,
1175         0xf90, 0xffffffff, 0x00000100,
1176         0xf98, 0x00000101, 0x00000000,
1177         0x20a8, 0xffffffff, 0x00000104,
1178         0x55e4, 0xff000fff, 0x00000100,
1179         0x30cc, 0xc0000fff, 0x00000104,
1180         0xc1e4, 0x00000001, 0x00000001,
1181         0xd00c, 0xff000ff0, 0x00000100,
1182         0xd80c, 0xff000ff0, 0x00000100
1183 };
1184
1185 static const u32 spectre_golden_spm_registers[] =
1186 {
1187         0x30800, 0xe0ffffff, 0xe0000000
1188 };
1189
1190 static const u32 spectre_golden_common_registers[] =
1191 {
1192         0xc770, 0xffffffff, 0x00000800,
1193         0xc774, 0xffffffff, 0x00000800,
1194         0xc798, 0xffffffff, 0x00007fbf,
1195         0xc79c, 0xffffffff, 0x00007faf
1196 };
1197
1198 static const u32 spectre_golden_registers[] =
1199 {
1200         0x3c000, 0xffff1fff, 0x96940200,
1201         0x3c00c, 0xffff0001, 0xff000000,
1202         0x3c200, 0xfffc0fff, 0x00000100,
1203         0x6ed8, 0x00010101, 0x00010000,
1204         0x9834, 0xf00fffff, 0x00000400,
1205         0x9838, 0xfffffffc, 0x00020200,
1206         0x5bb0, 0x000000f0, 0x00000070,
1207         0x5bc0, 0xf0311fff, 0x80300000,
1208         0x98f8, 0x73773777, 0x12010001,
1209         0x9b7c, 0x00ff0000, 0x00fc0000,
1210         0x2f48, 0x73773777, 0x12010001,
1211         0x8a14, 0xf000003f, 0x00000007,
1212         0x8b24, 0xffffffff, 0x00ffffff,
1213         0x28350, 0x3f3f3fff, 0x00000082,
1214         0x28354, 0x0000003f, 0x00000000,
1215         0x3e78, 0x00000001, 0x00000002,
1216         0x913c, 0xffff03df, 0x00000004,
1217         0xc768, 0x00000008, 0x00000008,
1218         0x8c00, 0x000008ff, 0x00000800,
1219         0x9508, 0x00010000, 0x00010000,
1220         0xac0c, 0xffffffff, 0x54763210,
1221         0x214f8, 0x01ff01ff, 0x00000002,
1222         0x21498, 0x007ff800, 0x00200000,
1223         0x2015c, 0xffffffff, 0x00000f40,
1224         0x30934, 0xffffffff, 0x00000001
1225 };
1226
1227 static const u32 spectre_mgcg_cgcg_init[] =
1228 {
1229         0xc420, 0xffffffff, 0xfffffffc,
1230         0x30800, 0xffffffff, 0xe0000000,
1231         0x3c2a0, 0xffffffff, 0x00000100,
1232         0x3c208, 0xffffffff, 0x00000100,
1233         0x3c2c0, 0xffffffff, 0x00000100,
1234         0x3c2c8, 0xffffffff, 0x00000100,
1235         0x3c2c4, 0xffffffff, 0x00000100,
1236         0x55e4, 0xffffffff, 0x00600100,
1237         0x3c280, 0xffffffff, 0x00000100,
1238         0x3c214, 0xffffffff, 0x06000100,
1239         0x3c220, 0xffffffff, 0x00000100,
1240         0x3c218, 0xffffffff, 0x06000100,
1241         0x3c204, 0xffffffff, 0x00000100,
1242         0x3c2e0, 0xffffffff, 0x00000100,
1243         0x3c224, 0xffffffff, 0x00000100,
1244         0x3c200, 0xffffffff, 0x00000100,
1245         0x3c230, 0xffffffff, 0x00000100,
1246         0x3c234, 0xffffffff, 0x00000100,
1247         0x3c250, 0xffffffff, 0x00000100,
1248         0x3c254, 0xffffffff, 0x00000100,
1249         0x3c258, 0xffffffff, 0x00000100,
1250         0x3c25c, 0xffffffff, 0x00000100,
1251         0x3c260, 0xffffffff, 0x00000100,
1252         0x3c27c, 0xffffffff, 0x00000100,
1253         0x3c278, 0xffffffff, 0x00000100,
1254         0x3c210, 0xffffffff, 0x06000100,
1255         0x3c290, 0xffffffff, 0x00000100,
1256         0x3c274, 0xffffffff, 0x00000100,
1257         0x3c2b4, 0xffffffff, 0x00000100,
1258         0x3c2b0, 0xffffffff, 0x00000100,
1259         0x3c270, 0xffffffff, 0x00000100,
1260         0x30800, 0xffffffff, 0xe0000000,
1261         0x3c020, 0xffffffff, 0x00010000,
1262         0x3c024, 0xffffffff, 0x00030002,
1263         0x3c028, 0xffffffff, 0x00040007,
1264         0x3c02c, 0xffffffff, 0x00060005,
1265         0x3c030, 0xffffffff, 0x00090008,
1266         0x3c034, 0xffffffff, 0x00010000,
1267         0x3c038, 0xffffffff, 0x00030002,
1268         0x3c03c, 0xffffffff, 0x00040007,
1269         0x3c040, 0xffffffff, 0x00060005,
1270         0x3c044, 0xffffffff, 0x00090008,
1271         0x3c048, 0xffffffff, 0x00010000,
1272         0x3c04c, 0xffffffff, 0x00030002,
1273         0x3c050, 0xffffffff, 0x00040007,
1274         0x3c054, 0xffffffff, 0x00060005,
1275         0x3c058, 0xffffffff, 0x00090008,
1276         0x3c05c, 0xffffffff, 0x00010000,
1277         0x3c060, 0xffffffff, 0x00030002,
1278         0x3c064, 0xffffffff, 0x00040007,
1279         0x3c068, 0xffffffff, 0x00060005,
1280         0x3c06c, 0xffffffff, 0x00090008,
1281         0x3c070, 0xffffffff, 0x00010000,
1282         0x3c074, 0xffffffff, 0x00030002,
1283         0x3c078, 0xffffffff, 0x00040007,
1284         0x3c07c, 0xffffffff, 0x00060005,
1285         0x3c080, 0xffffffff, 0x00090008,
1286         0x3c084, 0xffffffff, 0x00010000,
1287         0x3c088, 0xffffffff, 0x00030002,
1288         0x3c08c, 0xffffffff, 0x00040007,
1289         0x3c090, 0xffffffff, 0x00060005,
1290         0x3c094, 0xffffffff, 0x00090008,
1291         0x3c098, 0xffffffff, 0x00010000,
1292         0x3c09c, 0xffffffff, 0x00030002,
1293         0x3c0a0, 0xffffffff, 0x00040007,
1294         0x3c0a4, 0xffffffff, 0x00060005,
1295         0x3c0a8, 0xffffffff, 0x00090008,
1296         0x3c0ac, 0xffffffff, 0x00010000,
1297         0x3c0b0, 0xffffffff, 0x00030002,
1298         0x3c0b4, 0xffffffff, 0x00040007,
1299         0x3c0b8, 0xffffffff, 0x00060005,
1300         0x3c0bc, 0xffffffff, 0x00090008,
1301         0x3c000, 0xffffffff, 0x96e00200,
1302         0x8708, 0xffffffff, 0x00900100,
1303         0xc424, 0xffffffff, 0x0020003f,
1304         0x38, 0xffffffff, 0x0140001c,
1305         0x3c, 0x000f0000, 0x000f0000,
1306         0x220, 0xffffffff, 0xC060000C,
1307         0x224, 0xc0000fff, 0x00000100,
1308         0xf90, 0xffffffff, 0x00000100,
1309         0xf98, 0x00000101, 0x00000000,
1310         0x20a8, 0xffffffff, 0x00000104,
1311         0x55e4, 0xff000fff, 0x00000100,
1312         0x30cc, 0xc0000fff, 0x00000104,
1313         0xc1e4, 0x00000001, 0x00000001,
1314         0xd00c, 0xff000ff0, 0x00000100,
1315         0xd80c, 0xff000ff0, 0x00000100
1316 };
1317
1318 static const u32 kalindi_golden_spm_registers[] =
1319 {
1320         0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322
1323 static const u32 kalindi_golden_common_registers[] =
1324 {
1325         0xc770, 0xffffffff, 0x00000800,
1326         0xc774, 0xffffffff, 0x00000800,
1327         0xc798, 0xffffffff, 0x00007fbf,
1328         0xc79c, 0xffffffff, 0x00007faf
1329 };
1330
1331 static const u32 kalindi_golden_registers[] =
1332 {
1333         0x3c000, 0xffffdfff, 0x6e944040,
1334         0x55e4, 0xff607fff, 0xfc000100,
1335         0x3c220, 0xff000fff, 0x00000100,
1336         0x3c224, 0xff000fff, 0x00000100,
1337         0x3c200, 0xfffc0fff, 0x00000100,
1338         0x6ed8, 0x00010101, 0x00010000,
1339         0x9830, 0xffffffff, 0x00000000,
1340         0x9834, 0xf00fffff, 0x00000400,
1341         0x5bb0, 0x000000f0, 0x00000070,
1342         0x5bc0, 0xf0311fff, 0x80300000,
1343         0x98f8, 0x73773777, 0x12010001,
1344         0x98fc, 0xffffffff, 0x00000010,
1345         0x9b7c, 0x00ff0000, 0x00fc0000,
1346         0x8030, 0x00001f0f, 0x0000100a,
1347         0x2f48, 0x73773777, 0x12010001,
1348         0x2408, 0x000fffff, 0x000c007f,
1349         0x8a14, 0xf000003f, 0x00000007,
1350         0x8b24, 0x3fff3fff, 0x00ffcfff,
1351         0x30a04, 0x0000ff0f, 0x00000000,
1352         0x28a4c, 0x07ffffff, 0x06000000,
1353         0x4d8, 0x00000fff, 0x00000100,
1354         0x3e78, 0x00000001, 0x00000002,
1355         0xc768, 0x00000008, 0x00000008,
1356         0x8c00, 0x000000ff, 0x00000003,
1357         0x214f8, 0x01ff01ff, 0x00000002,
1358         0x21498, 0x007ff800, 0x00200000,
1359         0x2015c, 0xffffffff, 0x00000f40,
1360         0x88c4, 0x001f3ae3, 0x00000082,
1361         0x88d4, 0x0000001f, 0x00000010,
1362         0x30934, 0xffffffff, 0x00000000
1363 };
1364
1365 static const u32 kalindi_mgcg_cgcg_init[] =
1366 {
1367         0xc420, 0xffffffff, 0xfffffffc,
1368         0x30800, 0xffffffff, 0xe0000000,
1369         0x3c2a0, 0xffffffff, 0x00000100,
1370         0x3c208, 0xffffffff, 0x00000100,
1371         0x3c2c0, 0xffffffff, 0x00000100,
1372         0x3c2c8, 0xffffffff, 0x00000100,
1373         0x3c2c4, 0xffffffff, 0x00000100,
1374         0x55e4, 0xffffffff, 0x00600100,
1375         0x3c280, 0xffffffff, 0x00000100,
1376         0x3c214, 0xffffffff, 0x06000100,
1377         0x3c220, 0xffffffff, 0x00000100,
1378         0x3c218, 0xffffffff, 0x06000100,
1379         0x3c204, 0xffffffff, 0x00000100,
1380         0x3c2e0, 0xffffffff, 0x00000100,
1381         0x3c224, 0xffffffff, 0x00000100,
1382         0x3c200, 0xffffffff, 0x00000100,
1383         0x3c230, 0xffffffff, 0x00000100,
1384         0x3c234, 0xffffffff, 0x00000100,
1385         0x3c250, 0xffffffff, 0x00000100,
1386         0x3c254, 0xffffffff, 0x00000100,
1387         0x3c258, 0xffffffff, 0x00000100,
1388         0x3c25c, 0xffffffff, 0x00000100,
1389         0x3c260, 0xffffffff, 0x00000100,
1390         0x3c27c, 0xffffffff, 0x00000100,
1391         0x3c278, 0xffffffff, 0x00000100,
1392         0x3c210, 0xffffffff, 0x06000100,
1393         0x3c290, 0xffffffff, 0x00000100,
1394         0x3c274, 0xffffffff, 0x00000100,
1395         0x3c2b4, 0xffffffff, 0x00000100,
1396         0x3c2b0, 0xffffffff, 0x00000100,
1397         0x3c270, 0xffffffff, 0x00000100,
1398         0x30800, 0xffffffff, 0xe0000000,
1399         0x3c020, 0xffffffff, 0x00010000,
1400         0x3c024, 0xffffffff, 0x00030002,
1401         0x3c028, 0xffffffff, 0x00040007,
1402         0x3c02c, 0xffffffff, 0x00060005,
1403         0x3c030, 0xffffffff, 0x00090008,
1404         0x3c034, 0xffffffff, 0x00010000,
1405         0x3c038, 0xffffffff, 0x00030002,
1406         0x3c03c, 0xffffffff, 0x00040007,
1407         0x3c040, 0xffffffff, 0x00060005,
1408         0x3c044, 0xffffffff, 0x00090008,
1409         0x3c000, 0xffffffff, 0x96e00200,
1410         0x8708, 0xffffffff, 0x00900100,
1411         0xc424, 0xffffffff, 0x0020003f,
1412         0x38, 0xffffffff, 0x0140001c,
1413         0x3c, 0x000f0000, 0x000f0000,
1414         0x220, 0xffffffff, 0xC060000C,
1415         0x224, 0xc0000fff, 0x00000100,
1416         0x20a8, 0xffffffff, 0x00000104,
1417         0x55e4, 0xff000fff, 0x00000100,
1418         0x30cc, 0xc0000fff, 0x00000104,
1419         0xc1e4, 0x00000001, 0x00000001,
1420         0xd00c, 0xff000ff0, 0x00000100,
1421         0xd80c, 0xff000ff0, 0x00000100
1422 };
1423
1424 static const u32 hawaii_golden_spm_registers[] =
1425 {
1426         0x30800, 0xe0ffffff, 0xe0000000
1427 };
1428
1429 static const u32 hawaii_golden_common_registers[] =
1430 {
1431         0x30800, 0xffffffff, 0xe0000000,
1432         0x28350, 0xffffffff, 0x3a00161a,
1433         0x28354, 0xffffffff, 0x0000002e,
1434         0x9a10, 0xffffffff, 0x00018208,
1435         0x98f8, 0xffffffff, 0x12011003
1436 };
1437
1438 static const u32 hawaii_golden_registers[] =
1439 {
1440         0x3354, 0x00000333, 0x00000333,
1441         0x9a10, 0x00010000, 0x00058208,
1442         0x9830, 0xffffffff, 0x00000000,
1443         0x9834, 0xf00fffff, 0x00000400,
1444         0x9838, 0x0002021c, 0x00020200,
1445         0xc78, 0x00000080, 0x00000000,
1446         0x5bb0, 0x000000f0, 0x00000070,
1447         0x5bc0, 0xf0311fff, 0x80300000,
1448         0x350c, 0x00810000, 0x408af000,
1449         0x7030, 0x31000111, 0x00000011,
1450         0x2f48, 0x73773777, 0x12010001,
1451         0x2120, 0x0000007f, 0x0000001b,
1452         0x21dc, 0x00007fb6, 0x00002191,
1453         0x3628, 0x0000003f, 0x0000000a,
1454         0x362c, 0x0000003f, 0x0000000a,
1455         0x2ae4, 0x00073ffe, 0x000022a2,
1456         0x240c, 0x000007ff, 0x00000000,
1457         0x8bf0, 0x00002001, 0x00000001,
1458         0x8b24, 0xffffffff, 0x00ffffff,
1459         0x30a04, 0x0000ff0f, 0x00000000,
1460         0x28a4c, 0x07ffffff, 0x06000000,
1461         0x3e78, 0x00000001, 0x00000002,
1462         0xc768, 0x00000008, 0x00000008,
1463         0xc770, 0x00000f00, 0x00000800,
1464         0xc774, 0x00000f00, 0x00000800,
1465         0xc798, 0x00ffffff, 0x00ff7fbf,
1466         0xc79c, 0x00ffffff, 0x00ff7faf,
1467         0x8c00, 0x000000ff, 0x00000800,
1468         0xe40, 0x00001fff, 0x00001fff,
1469         0x9060, 0x0000007f, 0x00000020,
1470         0x9508, 0x00010000, 0x00010000,
1471         0xae00, 0x00100000, 0x000ff07c,
1472         0xac14, 0x000003ff, 0x0000000f,
1473         0xac10, 0xffffffff, 0x7564fdec,
1474         0xac0c, 0xffffffff, 0x3120b9a8,
1475         0xac08, 0x20000000, 0x0f9c0000
1476 };
1477
1478 static const u32 hawaii_mgcg_cgcg_init[] =
1479 {
1480         0xc420, 0xffffffff, 0xfffffffd,
1481         0x30800, 0xffffffff, 0xe0000000,
1482         0x3c2a0, 0xffffffff, 0x00000100,
1483         0x3c208, 0xffffffff, 0x00000100,
1484         0x3c2c0, 0xffffffff, 0x00000100,
1485         0x3c2c8, 0xffffffff, 0x00000100,
1486         0x3c2c4, 0xffffffff, 0x00000100,
1487         0x55e4, 0xffffffff, 0x00200100,
1488         0x3c280, 0xffffffff, 0x00000100,
1489         0x3c214, 0xffffffff, 0x06000100,
1490         0x3c220, 0xffffffff, 0x00000100,
1491         0x3c218, 0xffffffff, 0x06000100,
1492         0x3c204, 0xffffffff, 0x00000100,
1493         0x3c2e0, 0xffffffff, 0x00000100,
1494         0x3c224, 0xffffffff, 0x00000100,
1495         0x3c200, 0xffffffff, 0x00000100,
1496         0x3c230, 0xffffffff, 0x00000100,
1497         0x3c234, 0xffffffff, 0x00000100,
1498         0x3c250, 0xffffffff, 0x00000100,
1499         0x3c254, 0xffffffff, 0x00000100,
1500         0x3c258, 0xffffffff, 0x00000100,
1501         0x3c25c, 0xffffffff, 0x00000100,
1502         0x3c260, 0xffffffff, 0x00000100,
1503         0x3c27c, 0xffffffff, 0x00000100,
1504         0x3c278, 0xffffffff, 0x00000100,
1505         0x3c210, 0xffffffff, 0x06000100,
1506         0x3c290, 0xffffffff, 0x00000100,
1507         0x3c274, 0xffffffff, 0x00000100,
1508         0x3c2b4, 0xffffffff, 0x00000100,
1509         0x3c2b0, 0xffffffff, 0x00000100,
1510         0x3c270, 0xffffffff, 0x00000100,
1511         0x30800, 0xffffffff, 0xe0000000,
1512         0x3c020, 0xffffffff, 0x00010000,
1513         0x3c024, 0xffffffff, 0x00030002,
1514         0x3c028, 0xffffffff, 0x00040007,
1515         0x3c02c, 0xffffffff, 0x00060005,
1516         0x3c030, 0xffffffff, 0x00090008,
1517         0x3c034, 0xffffffff, 0x00010000,
1518         0x3c038, 0xffffffff, 0x00030002,
1519         0x3c03c, 0xffffffff, 0x00040007,
1520         0x3c040, 0xffffffff, 0x00060005,
1521         0x3c044, 0xffffffff, 0x00090008,
1522         0x3c048, 0xffffffff, 0x00010000,
1523         0x3c04c, 0xffffffff, 0x00030002,
1524         0x3c050, 0xffffffff, 0x00040007,
1525         0x3c054, 0xffffffff, 0x00060005,
1526         0x3c058, 0xffffffff, 0x00090008,
1527         0x3c05c, 0xffffffff, 0x00010000,
1528         0x3c060, 0xffffffff, 0x00030002,
1529         0x3c064, 0xffffffff, 0x00040007,
1530         0x3c068, 0xffffffff, 0x00060005,
1531         0x3c06c, 0xffffffff, 0x00090008,
1532         0x3c070, 0xffffffff, 0x00010000,
1533         0x3c074, 0xffffffff, 0x00030002,
1534         0x3c078, 0xffffffff, 0x00040007,
1535         0x3c07c, 0xffffffff, 0x00060005,
1536         0x3c080, 0xffffffff, 0x00090008,
1537         0x3c084, 0xffffffff, 0x00010000,
1538         0x3c088, 0xffffffff, 0x00030002,
1539         0x3c08c, 0xffffffff, 0x00040007,
1540         0x3c090, 0xffffffff, 0x00060005,
1541         0x3c094, 0xffffffff, 0x00090008,
1542         0x3c098, 0xffffffff, 0x00010000,
1543         0x3c09c, 0xffffffff, 0x00030002,
1544         0x3c0a0, 0xffffffff, 0x00040007,
1545         0x3c0a4, 0xffffffff, 0x00060005,
1546         0x3c0a8, 0xffffffff, 0x00090008,
1547         0x3c0ac, 0xffffffff, 0x00010000,
1548         0x3c0b0, 0xffffffff, 0x00030002,
1549         0x3c0b4, 0xffffffff, 0x00040007,
1550         0x3c0b8, 0xffffffff, 0x00060005,
1551         0x3c0bc, 0xffffffff, 0x00090008,
1552         0x3c0c0, 0xffffffff, 0x00010000,
1553         0x3c0c4, 0xffffffff, 0x00030002,
1554         0x3c0c8, 0xffffffff, 0x00040007,
1555         0x3c0cc, 0xffffffff, 0x00060005,
1556         0x3c0d0, 0xffffffff, 0x00090008,
1557         0x3c0d4, 0xffffffff, 0x00010000,
1558         0x3c0d8, 0xffffffff, 0x00030002,
1559         0x3c0dc, 0xffffffff, 0x00040007,
1560         0x3c0e0, 0xffffffff, 0x00060005,
1561         0x3c0e4, 0xffffffff, 0x00090008,
1562         0x3c0e8, 0xffffffff, 0x00010000,
1563         0x3c0ec, 0xffffffff, 0x00030002,
1564         0x3c0f0, 0xffffffff, 0x00040007,
1565         0x3c0f4, 0xffffffff, 0x00060005,
1566         0x3c0f8, 0xffffffff, 0x00090008,
1567         0xc318, 0xffffffff, 0x00020200,
1568         0x3350, 0xffffffff, 0x00000200,
1569         0x15c0, 0xffffffff, 0x00000400,
1570         0x55e8, 0xffffffff, 0x00000000,
1571         0x2f50, 0xffffffff, 0x00000902,
1572         0x3c000, 0xffffffff, 0x96940200,
1573         0x8708, 0xffffffff, 0x00900100,
1574         0xc424, 0xffffffff, 0x0020003f,
1575         0x38, 0xffffffff, 0x0140001c,
1576         0x3c, 0x000f0000, 0x000f0000,
1577         0x220, 0xffffffff, 0xc060000c,
1578         0x224, 0xc0000fff, 0x00000100,
1579         0xf90, 0xffffffff, 0x00000100,
1580         0xf98, 0x00000101, 0x00000000,
1581         0x20a8, 0xffffffff, 0x00000104,
1582         0x55e4, 0xff000fff, 0x00000100,
1583         0x30cc, 0xc0000fff, 0x00000104,
1584         0xc1e4, 0x00000001, 0x00000001,
1585         0xd00c, 0xff000ff0, 0x00000100,
1586         0xd80c, 0xff000ff0, 0x00000100
1587 };
1588
1589 static const u32 godavari_golden_registers[] =
1590 {
1591         0x55e4, 0xff607fff, 0xfc000100,
1592         0x6ed8, 0x00010101, 0x00010000,
1593         0x9830, 0xffffffff, 0x00000000,
1594         0x98302, 0xf00fffff, 0x00000400,
1595         0x6130, 0xffffffff, 0x00010000,
1596         0x5bb0, 0x000000f0, 0x00000070,
1597         0x5bc0, 0xf0311fff, 0x80300000,
1598         0x98f8, 0x73773777, 0x12010001,
1599         0x98fc, 0xffffffff, 0x00000010,
1600         0x8030, 0x00001f0f, 0x0000100a,
1601         0x2f48, 0x73773777, 0x12010001,
1602         0x2408, 0x000fffff, 0x000c007f,
1603         0x8a14, 0xf000003f, 0x00000007,
1604         0x8b24, 0xffffffff, 0x00ff0fff,
1605         0x30a04, 0x0000ff0f, 0x00000000,
1606         0x28a4c, 0x07ffffff, 0x06000000,
1607         0x4d8, 0x00000fff, 0x00000100,
1608         0xd014, 0x00010000, 0x00810001,
1609         0xd814, 0x00010000, 0x00810001,
1610         0x3e78, 0x00000001, 0x00000002,
1611         0xc768, 0x00000008, 0x00000008,
1612         0xc770, 0x00000f00, 0x00000800,
1613         0xc774, 0x00000f00, 0x00000800,
1614         0xc798, 0x00ffffff, 0x00ff7fbf,
1615         0xc79c, 0x00ffffff, 0x00ff7faf,
1616         0x8c00, 0x000000ff, 0x00000001,
1617         0x214f8, 0x01ff01ff, 0x00000002,
1618         0x21498, 0x007ff800, 0x00200000,
1619         0x2015c, 0xffffffff, 0x00000f40,
1620         0x88c4, 0x001f3ae3, 0x00000082,
1621         0x88d4, 0x0000001f, 0x00000010,
1622         0x30934, 0xffffffff, 0x00000000
1623 };
1624
1625
1626 static void cik_init_golden_registers(struct radeon_device *rdev)
1627 {
1628         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1629         mutex_lock(&rdev->grbm_idx_mutex);
1630         switch (rdev->family) {
1631         case CHIP_BONAIRE:
1632                 radeon_program_register_sequence(rdev,
1633                                                  bonaire_mgcg_cgcg_init,
1634                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635                 radeon_program_register_sequence(rdev,
1636                                                  bonaire_golden_registers,
1637                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638                 radeon_program_register_sequence(rdev,
1639                                                  bonaire_golden_common_registers,
1640                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641                 radeon_program_register_sequence(rdev,
1642                                                  bonaire_golden_spm_registers,
1643                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644                 break;
1645         case CHIP_KABINI:
1646                 radeon_program_register_sequence(rdev,
1647                                                  kalindi_mgcg_cgcg_init,
1648                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649                 radeon_program_register_sequence(rdev,
1650                                                  kalindi_golden_registers,
1651                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652                 radeon_program_register_sequence(rdev,
1653                                                  kalindi_golden_common_registers,
1654                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655                 radeon_program_register_sequence(rdev,
1656                                                  kalindi_golden_spm_registers,
1657                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658                 break;
1659         case CHIP_MULLINS:
1660                 radeon_program_register_sequence(rdev,
1661                                                  kalindi_mgcg_cgcg_init,
1662                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663                 radeon_program_register_sequence(rdev,
1664                                                  godavari_golden_registers,
1665                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1666                 radeon_program_register_sequence(rdev,
1667                                                  kalindi_golden_common_registers,
1668                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669                 radeon_program_register_sequence(rdev,
1670                                                  kalindi_golden_spm_registers,
1671                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672                 break;
1673         case CHIP_KAVERI:
1674                 radeon_program_register_sequence(rdev,
1675                                                  spectre_mgcg_cgcg_init,
1676                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677                 radeon_program_register_sequence(rdev,
1678                                                  spectre_golden_registers,
1679                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1680                 radeon_program_register_sequence(rdev,
1681                                                  spectre_golden_common_registers,
1682                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683                 radeon_program_register_sequence(rdev,
1684                                                  spectre_golden_spm_registers,
1685                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686                 break;
1687         case CHIP_HAWAII:
1688                 radeon_program_register_sequence(rdev,
1689                                                  hawaii_mgcg_cgcg_init,
1690                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691                 radeon_program_register_sequence(rdev,
1692                                                  hawaii_golden_registers,
1693                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694                 radeon_program_register_sequence(rdev,
1695                                                  hawaii_golden_common_registers,
1696                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697                 radeon_program_register_sequence(rdev,
1698                                                  hawaii_golden_spm_registers,
1699                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700                 break;
1701         default:
1702                 break;
1703         }
1704         mutex_unlock(&rdev->grbm_idx_mutex);
1705 }
1706
1707 /**
1708  * cik_get_xclk - get the xclk
1709  *
1710  * @rdev: radeon_device pointer
1711  *
1712  * Returns the reference clock used by the gfx engine
1713  * (CIK).
1714  */
1715 u32 cik_get_xclk(struct radeon_device *rdev)
1716 {
1717         u32 reference_clock = rdev->clock.spll.reference_freq;
1718
1719         if (rdev->flags & RADEON_IS_IGP) {
1720                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1721                         return reference_clock / 2;
1722         } else {
1723                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1724                         return reference_clock / 4;
1725         }
1726         return reference_clock;
1727 }
1728
1729 /**
1730  * cik_mm_rdoorbell - read a doorbell dword
1731  *
1732  * @rdev: radeon_device pointer
1733  * @index: doorbell index
1734  *
1735  * Returns the value in the doorbell aperture at the
1736  * requested doorbell index (CIK).
1737  */
1738 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1739 {
1740         if (index < rdev->doorbell.num_doorbells) {
1741                 return readl(rdev->doorbell.ptr + index);
1742         } else {
1743                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1744                 return 0;
1745         }
1746 }
1747
1748 /**
1749  * cik_mm_wdoorbell - write a doorbell dword
1750  *
1751  * @rdev: radeon_device pointer
1752  * @index: doorbell index
1753  * @v: value to write
1754  *
1755  * Writes @v to the doorbell aperture at the
1756  * requested doorbell index (CIK).
1757  */
1758 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1759 {
1760         if (index < rdev->doorbell.num_doorbells) {
1761                 writel(v, rdev->doorbell.ptr + index);
1762         } else {
1763                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1764         }
1765 }
1766
1767 #define BONAIRE_IO_MC_REGS_SIZE 36
1768
1769 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1770 {
1771         {0x00000070, 0x04400000},
1772         {0x00000071, 0x80c01803},
1773         {0x00000072, 0x00004004},
1774         {0x00000073, 0x00000100},
1775         {0x00000074, 0x00ff0000},
1776         {0x00000075, 0x34000000},
1777         {0x00000076, 0x08000014},
1778         {0x00000077, 0x00cc08ec},
1779         {0x00000078, 0x00000400},
1780         {0x00000079, 0x00000000},
1781         {0x0000007a, 0x04090000},
1782         {0x0000007c, 0x00000000},
1783         {0x0000007e, 0x4408a8e8},
1784         {0x0000007f, 0x00000304},
1785         {0x00000080, 0x00000000},
1786         {0x00000082, 0x00000001},
1787         {0x00000083, 0x00000002},
1788         {0x00000084, 0xf3e4f400},
1789         {0x00000085, 0x052024e3},
1790         {0x00000087, 0x00000000},
1791         {0x00000088, 0x01000000},
1792         {0x0000008a, 0x1c0a0000},
1793         {0x0000008b, 0xff010000},
1794         {0x0000008d, 0xffffefff},
1795         {0x0000008e, 0xfff3efff},
1796         {0x0000008f, 0xfff3efbf},
1797         {0x00000092, 0xf7ffffff},
1798         {0x00000093, 0xffffff7f},
1799         {0x00000095, 0x00101101},
1800         {0x00000096, 0x00000fff},
1801         {0x00000097, 0x00116fff},
1802         {0x00000098, 0x60010000},
1803         {0x00000099, 0x10010000},
1804         {0x0000009a, 0x00006000},
1805         {0x0000009b, 0x00001000},
1806         {0x0000009f, 0x00b48000}
1807 };
1808
1809 #define HAWAII_IO_MC_REGS_SIZE 22
1810
1811 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1812 {
1813         {0x0000007d, 0x40000000},
1814         {0x0000007e, 0x40180304},
1815         {0x0000007f, 0x0000ff00},
1816         {0x00000081, 0x00000000},
1817         {0x00000083, 0x00000800},
1818         {0x00000086, 0x00000000},
1819         {0x00000087, 0x00000100},
1820         {0x00000088, 0x00020100},
1821         {0x00000089, 0x00000000},
1822         {0x0000008b, 0x00040000},
1823         {0x0000008c, 0x00000100},
1824         {0x0000008e, 0xff010000},
1825         {0x00000090, 0xffffefff},
1826         {0x00000091, 0xfff3efff},
1827         {0x00000092, 0xfff3efbf},
1828         {0x00000093, 0xf7ffffff},
1829         {0x00000094, 0xffffff7f},
1830         {0x00000095, 0x00000fff},
1831         {0x00000096, 0x00116fff},
1832         {0x00000097, 0x60010000},
1833         {0x00000098, 0x10010000},
1834         {0x0000009f, 0x00c79000}
1835 };
1836
1837
1838 /**
1839  * cik_srbm_select - select specific register instances
1840  *
1841  * @rdev: radeon_device pointer
1842  * @me: selected ME (micro engine)
1843  * @pipe: pipe
1844  * @queue: queue
1845  * @vmid: VMID
1846  *
1847  * Switches the currently active registers instances.  Some
1848  * registers are instanced per VMID, others are instanced per
1849  * me/pipe/queue combination.
1850  */
1851 static void cik_srbm_select(struct radeon_device *rdev,
1852                             u32 me, u32 pipe, u32 queue, u32 vmid)
1853 {
1854         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1855                              MEID(me & 0x3) |
1856                              VMID(vmid & 0xf) |
1857                              QUEUEID(queue & 0x7));
1858         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1859 }
1860
1861 /* ucode loading */
1862 /**
1863  * ci_mc_load_microcode - load MC ucode into the hw
1864  *
1865  * @rdev: radeon_device pointer
1866  *
1867  * Load the GDDR MC ucode into the hw (CIK).
1868  * Returns 0 on success, error on failure.
1869  */
1870 int ci_mc_load_microcode(struct radeon_device *rdev)
1871 {
1872         const __be32 *fw_data = NULL;
1873         const __le32 *new_fw_data = NULL;
1874         u32 running, tmp;
1875         u32 *io_mc_regs = NULL;
1876         const __le32 *new_io_mc_regs = NULL;
1877         int i, regs_size, ucode_size;
1878
1879         if (!rdev->mc_fw)
1880                 return -EINVAL;
1881
1882         if (rdev->new_fw) {
1883                 const struct mc_firmware_header_v1_0 *hdr =
1884                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1885
1886                 radeon_ucode_print_mc_hdr(&hdr->header);
1887
1888                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1889                 new_io_mc_regs = (const __le32 *)
1890                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1891                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1892                 new_fw_data = (const __le32 *)
1893                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1894         } else {
1895                 ucode_size = rdev->mc_fw->size / 4;
1896
1897                 switch (rdev->family) {
1898                 case CHIP_BONAIRE:
1899                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1900                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1901                         break;
1902                 case CHIP_HAWAII:
1903                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1904                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1905                         break;
1906                 default:
1907                         return -EINVAL;
1908                 }
1909                 fw_data = (const __be32 *)rdev->mc_fw->data;
1910         }
1911
1912         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1913
1914         if (running == 0) {
1915                 /* reset the engine and set to writable */
1916                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1917                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1918
1919                 /* load mc io regs */
1920                 for (i = 0; i < regs_size; i++) {
1921                         if (rdev->new_fw) {
1922                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1923                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1924                         } else {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1927                         }
1928                 }
1929
1930                 tmp = RREG32(MC_SEQ_MISC0);
1931                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1932                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1933                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1934                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1935                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1936                 }
1937
1938                 /* load the MC ucode */
1939                 for (i = 0; i < ucode_size; i++) {
1940                         if (rdev->new_fw)
1941                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1942                         else
1943                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1944                 }
1945
1946                 /* put the engine back into the active state */
1947                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1948                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1949                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1950
1951                 /* wait for training to complete */
1952                 for (i = 0; i < rdev->usec_timeout; i++) {
1953                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1954                                 break;
1955                         udelay(1);
1956                 }
1957                 for (i = 0; i < rdev->usec_timeout; i++) {
1958                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1959                                 break;
1960                         udelay(1);
1961                 }
1962         }
1963
1964         return 0;
1965 }
1966
1967 /**
1968  * cik_init_microcode - load ucode images from disk
1969  *
1970  * @rdev: radeon_device pointer
1971  *
1972  * Use the firmware interface to load the ucode images into
1973  * the driver (not loaded into hw).
1974  * Returns 0 on success, error on failure.
1975  */
1976 static int cik_init_microcode(struct radeon_device *rdev)
1977 {
1978         const char *chip_name;
1979         const char *new_chip_name;
1980         size_t pfp_req_size, me_req_size, ce_req_size,
1981                 mec_req_size, rlc_req_size, mc_req_size = 0,
1982                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1983         char fw_name[30];
1984         int new_fw = 0;
1985         int err;
1986         int num_fw;
1987         bool new_smc = false;
1988
1989         DRM_DEBUG("\n");
1990
1991         switch (rdev->family) {
1992         case CHIP_BONAIRE:
1993                 chip_name = "BONAIRE";
1994                 if ((rdev->pdev->revision == 0x80) ||
1995                     (rdev->pdev->revision == 0x81) ||
1996                     (rdev->pdev->device == 0x665f))
1997                         new_smc = true;
1998                 new_chip_name = "bonaire";
1999                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2000                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2001                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2002                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2003                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2004                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2005                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2006                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2007                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2008                 num_fw = 8;
2009                 break;
2010         case CHIP_HAWAII:
2011                 chip_name = "HAWAII";
2012                 if (rdev->pdev->revision == 0x80)
2013                         new_smc = true;
2014                 new_chip_name = "hawaii";
2015                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2016                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2017                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2018                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2019                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2020                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2021                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2022                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2024                 num_fw = 8;
2025                 break;
2026         case CHIP_KAVERI:
2027                 chip_name = "KAVERI";
2028                 new_chip_name = "kaveri";
2029                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2030                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2031                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2032                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2033                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2034                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2035                 num_fw = 7;
2036                 break;
2037         case CHIP_KABINI:
2038                 chip_name = "KABINI";
2039                 new_chip_name = "kabini";
2040                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2041                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2042                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2043                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2044                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2045                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2046                 num_fw = 6;
2047                 break;
2048         case CHIP_MULLINS:
2049                 chip_name = "MULLINS";
2050                 new_chip_name = "mullins";
2051                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2052                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2053                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2054                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2055                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2056                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2057                 num_fw = 6;
2058                 break;
2059         default: BUG();
2060         }
2061
2062         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2063
2064         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2065         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2066         if (err) {
2067                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2068                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069                 if (err)
2070                         goto out;
2071                 if (rdev->pfp_fw->size != pfp_req_size) {
2072                         printk(KERN_ERR
2073                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074                                rdev->pfp_fw->size, fw_name);
2075                         err = -EINVAL;
2076                         goto out;
2077                 }
2078         } else {
2079                 err = radeon_ucode_validate(rdev->pfp_fw);
2080                 if (err) {
2081                         printk(KERN_ERR
2082                                "cik_fw: validation failed for firmware \"%s\"\n",
2083                                fw_name);
2084                         goto out;
2085                 } else {
2086                         new_fw++;
2087                 }
2088         }
2089
2090         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092         if (err) {
2093                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095                 if (err)
2096                         goto out;
2097                 if (rdev->me_fw->size != me_req_size) {
2098                         printk(KERN_ERR
2099                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100                                rdev->me_fw->size, fw_name);
2101                         err = -EINVAL;
2102                 }
2103         } else {
2104                 err = radeon_ucode_validate(rdev->me_fw);
2105                 if (err) {
2106                         printk(KERN_ERR
2107                                "cik_fw: validation failed for firmware \"%s\"\n",
2108                                fw_name);
2109                         goto out;
2110                 } else {
2111                         new_fw++;
2112                 }
2113         }
2114
2115         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2116         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2117         if (err) {
2118                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2119                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2120                 if (err)
2121                         goto out;
2122                 if (rdev->ce_fw->size != ce_req_size) {
2123                         printk(KERN_ERR
2124                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2125                                rdev->ce_fw->size, fw_name);
2126                         err = -EINVAL;
2127                 }
2128         } else {
2129                 err = radeon_ucode_validate(rdev->ce_fw);
2130                 if (err) {
2131                         printk(KERN_ERR
2132                                "cik_fw: validation failed for firmware \"%s\"\n",
2133                                fw_name);
2134                         goto out;
2135                 } else {
2136                         new_fw++;
2137                 }
2138         }
2139
2140         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2141         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142         if (err) {
2143                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2144                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2145                 if (err)
2146                         goto out;
2147                 if (rdev->mec_fw->size != mec_req_size) {
2148                         printk(KERN_ERR
2149                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2150                                rdev->mec_fw->size, fw_name);
2151                         err = -EINVAL;
2152                 }
2153         } else {
2154                 err = radeon_ucode_validate(rdev->mec_fw);
2155                 if (err) {
2156                         printk(KERN_ERR
2157                                "cik_fw: validation failed for firmware \"%s\"\n",
2158                                fw_name);
2159                         goto out;
2160                 } else {
2161                         new_fw++;
2162                 }
2163         }
2164
2165         if (rdev->family == CHIP_KAVERI) {
2166                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2167                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2168                 if (err) {
2169                         goto out;
2170                 } else {
2171                         err = radeon_ucode_validate(rdev->mec2_fw);
2172                         if (err) {
2173                                 goto out;
2174                         } else {
2175                                 new_fw++;
2176                         }
2177                 }
2178         }
2179
2180         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2181         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2182         if (err) {
2183                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2184                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2185                 if (err)
2186                         goto out;
2187                 if (rdev->rlc_fw->size != rlc_req_size) {
2188                         printk(KERN_ERR
2189                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2190                                rdev->rlc_fw->size, fw_name);
2191                         err = -EINVAL;
2192                 }
2193         } else {
2194                 err = radeon_ucode_validate(rdev->rlc_fw);
2195                 if (err) {
2196                         printk(KERN_ERR
2197                                "cik_fw: validation failed for firmware \"%s\"\n",
2198                                fw_name);
2199                         goto out;
2200                 } else {
2201                         new_fw++;
2202                 }
2203         }
2204
2205         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2206         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2207         if (err) {
2208                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2209                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2210                 if (err)
2211                         goto out;
2212                 if (rdev->sdma_fw->size != sdma_req_size) {
2213                         printk(KERN_ERR
2214                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2215                                rdev->sdma_fw->size, fw_name);
2216                         err = -EINVAL;
2217                 }
2218         } else {
2219                 err = radeon_ucode_validate(rdev->sdma_fw);
2220                 if (err) {
2221                         printk(KERN_ERR
2222                                "cik_fw: validation failed for firmware \"%s\"\n",
2223                                fw_name);
2224                         goto out;
2225                 } else {
2226                         new_fw++;
2227                 }
2228         }
2229
2230         /* No SMC, MC ucode on APUs */
2231         if (!(rdev->flags & RADEON_IS_IGP)) {
2232                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2233                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2234                 if (err) {
2235                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2236                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2237                         if (err) {
2238                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2239                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2240                                 if (err)
2241                                         goto out;
2242                         }
2243                         if ((rdev->mc_fw->size != mc_req_size) &&
2244                             (rdev->mc_fw->size != mc2_req_size)){
2245                                 printk(KERN_ERR
2246                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2247                                        rdev->mc_fw->size, fw_name);
2248                                 err = -EINVAL;
2249                         }
2250                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2251                 } else {
2252                         err = radeon_ucode_validate(rdev->mc_fw);
2253                         if (err) {
2254                                 printk(KERN_ERR
2255                                        "cik_fw: validation failed for firmware \"%s\"\n",
2256                                        fw_name);
2257                                 goto out;
2258                         } else {
2259                                 new_fw++;
2260                         }
2261                 }
2262
2263                 if (new_smc)
2264                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2265                 else
2266                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2267                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2268                 if (err) {
2269                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2270                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2271                         if (err) {
2272                                 printk(KERN_ERR
2273                                        "smc: error loading firmware \"%s\"\n",
2274                                        fw_name);
2275                                 release_firmware(rdev->smc_fw);
2276                                 rdev->smc_fw = NULL;
2277                                 err = 0;
2278                         } else if (rdev->smc_fw->size != smc_req_size) {
2279                                 printk(KERN_ERR
2280                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2281                                        rdev->smc_fw->size, fw_name);
2282                                 err = -EINVAL;
2283                         }
2284                 } else {
2285                         err = radeon_ucode_validate(rdev->smc_fw);
2286                         if (err) {
2287                                 printk(KERN_ERR
2288                                        "cik_fw: validation failed for firmware \"%s\"\n",
2289                                        fw_name);
2290                                 goto out;
2291                         } else {
2292                                 new_fw++;
2293                         }
2294                 }
2295         }
2296
2297         if (new_fw == 0) {
2298                 rdev->new_fw = false;
2299         } else if (new_fw < num_fw) {
2300                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2301                 err = -EINVAL;
2302         } else {
2303                 rdev->new_fw = true;
2304         }
2305
2306 out:
2307         if (err) {
2308                 if (err != -EINVAL)
2309                         printk(KERN_ERR
2310                                "cik_cp: Failed to load firmware \"%s\"\n",
2311                                fw_name);
2312                 release_firmware(rdev->pfp_fw);
2313                 rdev->pfp_fw = NULL;
2314                 release_firmware(rdev->me_fw);
2315                 rdev->me_fw = NULL;
2316                 release_firmware(rdev->ce_fw);
2317                 rdev->ce_fw = NULL;
2318                 release_firmware(rdev->mec_fw);
2319                 rdev->mec_fw = NULL;
2320                 release_firmware(rdev->mec2_fw);
2321                 rdev->mec2_fw = NULL;
2322                 release_firmware(rdev->rlc_fw);
2323                 rdev->rlc_fw = NULL;
2324                 release_firmware(rdev->sdma_fw);
2325                 rdev->sdma_fw = NULL;
2326                 release_firmware(rdev->mc_fw);
2327                 rdev->mc_fw = NULL;
2328                 release_firmware(rdev->smc_fw);
2329                 rdev->smc_fw = NULL;
2330         }
2331         return err;
2332 }
2333
2334 /*
2335  * Core functions
2336  */
2337 /**
2338  * cik_tiling_mode_table_init - init the hw tiling table
2339  *
2340  * @rdev: radeon_device pointer
2341  *
2342  * Starting with SI, the tiling setup is done globally in a
2343  * set of 32 tiling modes.  Rather than selecting each set of
2344  * parameters per surface as on older asics, we just select
2345  * which index in the tiling table we want to use, and the
2346  * surface uses those parameters (CIK).
2347  */
2348 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2349 {
2350         u32 *tile = rdev->config.cik.tile_mode_array;
2351         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2352         const u32 num_tile_mode_states =
2353                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2354         const u32 num_secondary_tile_mode_states =
2355                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2356         u32 reg_offset, split_equal_to_row_size;
2357         u32 num_pipe_configs;
2358         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2359                 rdev->config.cik.max_shader_engines;
2360
2361         switch (rdev->config.cik.mem_row_size_in_kb) {
2362         case 1:
2363                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2364                 break;
2365         case 2:
2366         default:
2367                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2368                 break;
2369         case 4:
2370                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2371                 break;
2372         }
2373
2374         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2375         if (num_pipe_configs > 8)
2376                 num_pipe_configs = 16;
2377
2378         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2379                 tile[reg_offset] = 0;
2380         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2381                 macrotile[reg_offset] = 0;
2382
2383         switch(num_pipe_configs) {
2384         case 16:
2385                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2389                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2401                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                            TILE_SPLIT(split_equal_to_row_size));
2405                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2412                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2413                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2414                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                            TILE_SPLIT(split_equal_to_row_size));
2416                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2417                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2418                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2421                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2436                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2451                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2458                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463
2464                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                            NUM_BANKS(ADDR_SURF_16_BANK));
2468                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                            NUM_BANKS(ADDR_SURF_16_BANK));
2472                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                            NUM_BANKS(ADDR_SURF_16_BANK));
2476                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479                            NUM_BANKS(ADDR_SURF_16_BANK));
2480                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                            NUM_BANKS(ADDR_SURF_8_BANK));
2484                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                            NUM_BANKS(ADDR_SURF_4_BANK));
2488                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                            NUM_BANKS(ADDR_SURF_2_BANK));
2492                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                            NUM_BANKS(ADDR_SURF_16_BANK));
2496                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2498                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499                            NUM_BANKS(ADDR_SURF_16_BANK));
2500                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503                             NUM_BANKS(ADDR_SURF_16_BANK));
2504                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                             NUM_BANKS(ADDR_SURF_8_BANK));
2508                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                             NUM_BANKS(ADDR_SURF_4_BANK));
2512                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                             NUM_BANKS(ADDR_SURF_2_BANK));
2516                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                             NUM_BANKS(ADDR_SURF_2_BANK));
2520
2521                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2522                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2523                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2524                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2525                 break;
2526
2527         case 8:
2528                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2532                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2536                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2544                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                            TILE_SPLIT(split_equal_to_row_size));
2548                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2556                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                            TILE_SPLIT(split_equal_to_row_size));
2559                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2561                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2564                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2579                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2594                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606
2607                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                 NUM_BANKS(ADDR_SURF_8_BANK));
2627                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                 NUM_BANKS(ADDR_SURF_4_BANK));
2631                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                 NUM_BANKS(ADDR_SURF_2_BANK));
2635                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_8_BANK));
2655                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_4_BANK));
2659                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                 NUM_BANKS(ADDR_SURF_2_BANK));
2663
2664                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2665                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2666                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2667                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2668                 break;
2669
2670         case 4:
2671                 if (num_rbs == 4) {
2672                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2676                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2680                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2688                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                            TILE_SPLIT(split_equal_to_row_size));
2692                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2699                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2700                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2701                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                            TILE_SPLIT(split_equal_to_row_size));
2703                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2723                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2738                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2739                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750
2751                 } else if (num_rbs < 4) {
2752                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2756                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2760                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2768                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            TILE_SPLIT(split_equal_to_row_size));
2772                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2780                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782                            TILE_SPLIT(split_equal_to_row_size));
2783                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2785                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2788                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2803                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2818                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                 }
2831
2832                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_8_BANK));
2856                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859                                 NUM_BANKS(ADDR_SURF_4_BANK));
2860                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                 NUM_BANKS(ADDR_SURF_16_BANK));
2868                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                 NUM_BANKS(ADDR_SURF_16_BANK));
2872                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                 NUM_BANKS(ADDR_SURF_8_BANK));
2884                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887                                 NUM_BANKS(ADDR_SURF_4_BANK));
2888
2889                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2891                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2892                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2893                 break;
2894
2895         case 2:
2896                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2900                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2904                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2912                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914                            PIPE_CONFIG(ADDR_SURF_P2) |
2915                            TILE_SPLIT(split_equal_to_row_size));
2916                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                            PIPE_CONFIG(ADDR_SURF_P2) |
2918                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921                            PIPE_CONFIG(ADDR_SURF_P2) |
2922                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2923                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2925                            PIPE_CONFIG(ADDR_SURF_P2) |
2926                            TILE_SPLIT(split_equal_to_row_size));
2927                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928                            PIPE_CONFIG(ADDR_SURF_P2);
2929                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931                            PIPE_CONFIG(ADDR_SURF_P2));
2932                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2) |
2943                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                             PIPE_CONFIG(ADDR_SURF_P2) |
2946                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2947                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2) |
2958                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2960                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961                             PIPE_CONFIG(ADDR_SURF_P2));
2962                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2963                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964                             PIPE_CONFIG(ADDR_SURF_P2) |
2965                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968                             PIPE_CONFIG(ADDR_SURF_P2) |
2969                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                             PIPE_CONFIG(ADDR_SURF_P2) |
2973                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974
2975                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                 NUM_BANKS(ADDR_SURF_16_BANK));
2987                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002                                 NUM_BANKS(ADDR_SURF_8_BANK));
3003                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030                                 NUM_BANKS(ADDR_SURF_8_BANK));
3031
3032                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3033                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3034                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3035                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3036                 break;
3037
3038         default:
3039                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3040         }
3041 }
3042
3043 /**
3044  * cik_select_se_sh - select which SE, SH to address
3045  *
3046  * @rdev: radeon_device pointer
3047  * @se_num: shader engine to address
3048  * @sh_num: sh block to address
3049  *
3050  * Select which SE, SH combinations to address. Certain
3051  * registers are instanced per SE or SH.  0xffffffff means
3052  * broadcast to all SEs or SHs (CIK).
3053  */
3054 static void cik_select_se_sh(struct radeon_device *rdev,
3055                              u32 se_num, u32 sh_num)
3056 {
3057         u32 data = INSTANCE_BROADCAST_WRITES;
3058
3059         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3060                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3061         else if (se_num == 0xffffffff)
3062                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3063         else if (sh_num == 0xffffffff)
3064                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3065         else
3066                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3067         WREG32(GRBM_GFX_INDEX, data);
3068 }
3069
3070 /**
3071  * cik_create_bitmask - create a bitmask
3072  *
3073  * @bit_width: length of the mask
3074  *
3075  * create a variable length bit mask (CIK).
3076  * Returns the bitmask.
3077  */
3078 static u32 cik_create_bitmask(u32 bit_width)
3079 {
3080         u32 i, mask = 0;
3081
3082         for (i = 0; i < bit_width; i++) {
3083                 mask <<= 1;
3084                 mask |= 1;
3085         }
3086         return mask;
3087 }
3088
3089 /**
3090  * cik_get_rb_disabled - computes the mask of disabled RBs
3091  *
3092  * @rdev: radeon_device pointer
3093  * @max_rb_num: max RBs (render backends) for the asic
3094  * @se_num: number of SEs (shader engines) for the asic
3095  * @sh_per_se: number of SH blocks per SE for the asic
3096  *
3097  * Calculates the bitmask of disabled RBs (CIK).
3098  * Returns the disabled RB bitmask.
3099  */
3100 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3101                               u32 max_rb_num_per_se,
3102                               u32 sh_per_se)
3103 {
3104         u32 data, mask;
3105
3106         data = RREG32(CC_RB_BACKEND_DISABLE);
3107         if (data & 1)
3108                 data &= BACKEND_DISABLE_MASK;
3109         else
3110                 data = 0;
3111         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3112
3113         data >>= BACKEND_DISABLE_SHIFT;
3114
3115         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3116
3117         return data & mask;
3118 }
3119
3120 /**
3121  * cik_setup_rb - setup the RBs on the asic
3122  *
3123  * @rdev: radeon_device pointer
3124  * @se_num: number of SEs (shader engines) for the asic
3125  * @sh_per_se: number of SH blocks per SE for the asic
3126  * @max_rb_num: max RBs (render backends) for the asic
3127  *
3128  * Configures per-SE/SH RB registers (CIK).
3129  */
3130 static void cik_setup_rb(struct radeon_device *rdev,
3131                          u32 se_num, u32 sh_per_se,
3132                          u32 max_rb_num_per_se)
3133 {
3134         int i, j;
3135         u32 data, mask;
3136         u32 disabled_rbs = 0;
3137         u32 enabled_rbs = 0;
3138
3139         mutex_lock(&rdev->grbm_idx_mutex);
3140         for (i = 0; i < se_num; i++) {
3141                 for (j = 0; j < sh_per_se; j++) {
3142                         cik_select_se_sh(rdev, i, j);
3143                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3144                         if (rdev->family == CHIP_HAWAII)
3145                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3146                         else
3147                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3148                 }
3149         }
3150         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3151         mutex_unlock(&rdev->grbm_idx_mutex);
3152
3153         mask = 1;
3154         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3155                 if (!(disabled_rbs & mask))
3156                         enabled_rbs |= mask;
3157                 mask <<= 1;
3158         }
3159
3160         rdev->config.cik.backend_enable_mask = enabled_rbs;
3161
3162         mutex_lock(&rdev->grbm_idx_mutex);
3163         for (i = 0; i < se_num; i++) {
3164                 cik_select_se_sh(rdev, i, 0xffffffff);
3165                 data = 0;
3166                 for (j = 0; j < sh_per_se; j++) {
3167                         switch (enabled_rbs & 3) {
3168                         case 0:
3169                                 if (j == 0)
3170                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3171                                 else
3172                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3173                                 break;
3174                         case 1:
3175                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3176                                 break;
3177                         case 2:
3178                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3179                                 break;
3180                         case 3:
3181                         default:
3182                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3183                                 break;
3184                         }
3185                         enabled_rbs >>= 2;
3186                 }
3187                 WREG32(PA_SC_RASTER_CONFIG, data);
3188         }
3189         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3190         mutex_unlock(&rdev->grbm_idx_mutex);
3191 }
3192
3193 /**
3194  * cik_gpu_init - setup the 3D engine
3195  *
3196  * @rdev: radeon_device pointer
3197  *
3198  * Configures the 3D engine and tiling configuration
3199  * registers so that the 3D engine is usable.
3200  */
3201 static void cik_gpu_init(struct radeon_device *rdev)
3202 {
3203         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3204         u32 mc_shared_chmap, mc_arb_ramcfg;
3205         u32 hdp_host_path_cntl;
3206         u32 tmp;
3207         int i, j;
3208
3209         switch (rdev->family) {
3210         case CHIP_BONAIRE:
3211                 rdev->config.cik.max_shader_engines = 2;
3212                 rdev->config.cik.max_tile_pipes = 4;
3213                 rdev->config.cik.max_cu_per_sh = 7;
3214                 rdev->config.cik.max_sh_per_se = 1;
3215                 rdev->config.cik.max_backends_per_se = 2;
3216                 rdev->config.cik.max_texture_channel_caches = 4;
3217                 rdev->config.cik.max_gprs = 256;
3218                 rdev->config.cik.max_gs_threads = 32;
3219                 rdev->config.cik.max_hw_contexts = 8;
3220
3221                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3222                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3223                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3224                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3225                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3226                 break;
3227         case CHIP_HAWAII:
3228                 rdev->config.cik.max_shader_engines = 4;
3229                 rdev->config.cik.max_tile_pipes = 16;
3230                 rdev->config.cik.max_cu_per_sh = 11;
3231                 rdev->config.cik.max_sh_per_se = 1;
3232                 rdev->config.cik.max_backends_per_se = 4;
3233                 rdev->config.cik.max_texture_channel_caches = 16;
3234                 rdev->config.cik.max_gprs = 256;
3235                 rdev->config.cik.max_gs_threads = 32;
3236                 rdev->config.cik.max_hw_contexts = 8;
3237
3238                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3239                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3240                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3241                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3242                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3243                 break;
3244         case CHIP_KAVERI:
3245                 rdev->config.cik.max_shader_engines = 1;
3246                 rdev->config.cik.max_tile_pipes = 4;
3247                 if ((rdev->pdev->device == 0x1304) ||
3248                     (rdev->pdev->device == 0x1305) ||
3249                     (rdev->pdev->device == 0x130C) ||
3250                     (rdev->pdev->device == 0x130F) ||
3251                     (rdev->pdev->device == 0x1310) ||
3252                     (rdev->pdev->device == 0x1311) ||
3253                     (rdev->pdev->device == 0x131C)) {
3254                         rdev->config.cik.max_cu_per_sh = 8;
3255                         rdev->config.cik.max_backends_per_se = 2;
3256                 } else if ((rdev->pdev->device == 0x1309) ||
3257                            (rdev->pdev->device == 0x130A) ||
3258                            (rdev->pdev->device == 0x130D) ||
3259                            (rdev->pdev->device == 0x1313) ||
3260                            (rdev->pdev->device == 0x131D)) {
3261                         rdev->config.cik.max_cu_per_sh = 6;
3262                         rdev->config.cik.max_backends_per_se = 2;
3263                 } else if ((rdev->pdev->device == 0x1306) ||
3264                            (rdev->pdev->device == 0x1307) ||
3265                            (rdev->pdev->device == 0x130B) ||
3266                            (rdev->pdev->device == 0x130E) ||
3267                            (rdev->pdev->device == 0x1315) ||
3268                            (rdev->pdev->device == 0x1318) ||
3269                            (rdev->pdev->device == 0x131B)) {
3270                         rdev->config.cik.max_cu_per_sh = 4;
3271                         rdev->config.cik.max_backends_per_se = 1;
3272                 } else {
3273                         rdev->config.cik.max_cu_per_sh = 3;
3274                         rdev->config.cik.max_backends_per_se = 1;
3275                 }
3276                 rdev->config.cik.max_sh_per_se = 1;
3277                 rdev->config.cik.max_texture_channel_caches = 4;
3278                 rdev->config.cik.max_gprs = 256;
3279                 rdev->config.cik.max_gs_threads = 16;
3280                 rdev->config.cik.max_hw_contexts = 8;
3281
3282                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3283                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3284                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3285                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3286                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3287                 break;
3288         case CHIP_KABINI:
3289         case CHIP_MULLINS:
3290         default:
3291                 rdev->config.cik.max_shader_engines = 1;
3292                 rdev->config.cik.max_tile_pipes = 2;
3293                 rdev->config.cik.max_cu_per_sh = 2;
3294                 rdev->config.cik.max_sh_per_se = 1;
3295                 rdev->config.cik.max_backends_per_se = 1;
3296                 rdev->config.cik.max_texture_channel_caches = 2;
3297                 rdev->config.cik.max_gprs = 256;
3298                 rdev->config.cik.max_gs_threads = 16;
3299                 rdev->config.cik.max_hw_contexts = 8;
3300
3301                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3302                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3303                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3304                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3305                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3306                 break;
3307         }
3308
3309         /* Initialize HDP */
3310         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3311                 WREG32((0x2c14 + j), 0x00000000);
3312                 WREG32((0x2c18 + j), 0x00000000);
3313                 WREG32((0x2c1c + j), 0x00000000);
3314                 WREG32((0x2c20 + j), 0x00000000);
3315                 WREG32((0x2c24 + j), 0x00000000);
3316         }
3317
3318         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3319         WREG32(SRBM_INT_CNTL, 0x1);
3320         WREG32(SRBM_INT_ACK, 0x1);
3321
3322         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3323
3324         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3325         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3326
3327         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3328         rdev->config.cik.mem_max_burst_length_bytes = 256;
3329         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3330         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3331         if (rdev->config.cik.mem_row_size_in_kb > 4)
3332                 rdev->config.cik.mem_row_size_in_kb = 4;
3333         /* XXX use MC settings? */
3334         rdev->config.cik.shader_engine_tile_size = 32;
3335         rdev->config.cik.num_gpus = 1;
3336         rdev->config.cik.multi_gpu_tile_size = 64;
3337
3338         /* fix up row size */
3339         gb_addr_config &= ~ROW_SIZE_MASK;
3340         switch (rdev->config.cik.mem_row_size_in_kb) {
3341         case 1:
3342         default:
3343                 gb_addr_config |= ROW_SIZE(0);
3344                 break;
3345         case 2:
3346                 gb_addr_config |= ROW_SIZE(1);
3347                 break;
3348         case 4:
3349                 gb_addr_config |= ROW_SIZE(2);
3350                 break;
3351         }
3352
3353         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3354          * not have bank info, so create a custom tiling dword.
3355          * bits 3:0   num_pipes
3356          * bits 7:4   num_banks
3357          * bits 11:8  group_size
3358          * bits 15:12 row_size
3359          */
3360         rdev->config.cik.tile_config = 0;
3361         switch (rdev->config.cik.num_tile_pipes) {
3362         case 1:
3363                 rdev->config.cik.tile_config |= (0 << 0);
3364                 break;
3365         case 2:
3366                 rdev->config.cik.tile_config |= (1 << 0);
3367                 break;
3368         case 4:
3369                 rdev->config.cik.tile_config |= (2 << 0);
3370                 break;
3371         case 8:
3372         default:
3373                 /* XXX what about 12? */
3374                 rdev->config.cik.tile_config |= (3 << 0);
3375                 break;
3376         }
3377         rdev->config.cik.tile_config |=
3378                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3379         rdev->config.cik.tile_config |=
3380                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3381         rdev->config.cik.tile_config |=
3382                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3383
3384         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3385         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3386         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3387         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3388         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3389         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3390         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3391         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3392
3393         cik_tiling_mode_table_init(rdev);
3394
3395         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3396                      rdev->config.cik.max_sh_per_se,
3397                      rdev->config.cik.max_backends_per_se);
3398
3399         rdev->config.cik.active_cus = 0;
3400         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3401                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3402                         rdev->config.cik.active_cus +=
3403                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3404                 }
3405         }
3406
3407         /* set HW defaults for 3D engine */
3408         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3409
3410         mutex_lock(&rdev->grbm_idx_mutex);
3411         /*
3412          * making sure that the following register writes will be broadcasted
3413          * to all the shaders
3414          */
3415         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3416         WREG32(SX_DEBUG_1, 0x20);
3417
3418         WREG32(TA_CNTL_AUX, 0x00010000);
3419
3420         tmp = RREG32(SPI_CONFIG_CNTL);
3421         tmp |= 0x03000000;
3422         WREG32(SPI_CONFIG_CNTL, tmp);
3423
3424         WREG32(SQ_CONFIG, 1);
3425
3426         WREG32(DB_DEBUG, 0);
3427
3428         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3429         tmp |= 0x00000400;
3430         WREG32(DB_DEBUG2, tmp);
3431
3432         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3433         tmp |= 0x00020200;
3434         WREG32(DB_DEBUG3, tmp);
3435
3436         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3437         tmp |= 0x00018208;
3438         WREG32(CB_HW_CONTROL, tmp);
3439
3440         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3441
3442         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3443                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3444                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3445                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3446
3447         WREG32(VGT_NUM_INSTANCES, 1);
3448
3449         WREG32(CP_PERFMON_CNTL, 0);
3450
3451         WREG32(SQ_CONFIG, 0);
3452
3453         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3454                                           FORCE_EOV_MAX_REZ_CNT(255)));
3455
3456         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3457                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3458
3459         WREG32(VGT_GS_VERTEX_REUSE, 16);
3460         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3461
3462         tmp = RREG32(HDP_MISC_CNTL);
3463         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3464         WREG32(HDP_MISC_CNTL, tmp);
3465
3466         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3467         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3468
3469         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3470         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3471         mutex_unlock(&rdev->grbm_idx_mutex);
3472
3473         udelay(50);
3474 }
3475
3476 /*
3477  * GPU scratch registers helpers function.
3478  */
3479 /**
3480  * cik_scratch_init - setup driver info for CP scratch regs
3481  *
3482  * @rdev: radeon_device pointer
3483  *
3484  * Set up the number and offset of the CP scratch registers.
3485  * NOTE: use of CP scratch registers is a legacy inferface and
3486  * is not used by default on newer asics (r6xx+).  On newer asics,
3487  * memory buffers are used for fences rather than scratch regs.
3488  */
3489 static void cik_scratch_init(struct radeon_device *rdev)
3490 {
3491         int i;
3492
3493         rdev->scratch.num_reg = 7;
3494         rdev->scratch.reg_base = SCRATCH_REG0;
3495         for (i = 0; i < rdev->scratch.num_reg; i++) {
3496                 rdev->scratch.free[i] = true;
3497                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3498         }
3499 }
3500
3501 /**
3502  * cik_ring_test - basic gfx ring test
3503  *
3504  * @rdev: radeon_device pointer
3505  * @ring: radeon_ring structure holding ring information
3506  *
3507  * Allocate a scratch register and write to it using the gfx ring (CIK).
3508  * Provides a basic gfx ring test to verify that the ring is working.
3509  * Used by cik_cp_gfx_resume();
3510  * Returns 0 on success, error on failure.
3511  */
3512 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3513 {
3514         uint32_t scratch;
3515         uint32_t tmp = 0;
3516         unsigned i;
3517         int r;
3518
3519         r = radeon_scratch_get(rdev, &scratch);
3520         if (r) {
3521                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3522                 return r;
3523         }
3524         WREG32(scratch, 0xCAFEDEAD);
3525         r = radeon_ring_lock(rdev, ring, 3);
3526         if (r) {
3527                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3528                 radeon_scratch_free(rdev, scratch);
3529                 return r;
3530         }
3531         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3532         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3533         radeon_ring_write(ring, 0xDEADBEEF);
3534         radeon_ring_unlock_commit(rdev, ring, false);
3535
3536         for (i = 0; i < rdev->usec_timeout; i++) {
3537                 tmp = RREG32(scratch);
3538                 if (tmp == 0xDEADBEEF)
3539                         break;
3540                 DRM_UDELAY(1);
3541         }
3542         if (i < rdev->usec_timeout) {
3543                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3544         } else {
3545                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3546                           ring->idx, scratch, tmp);
3547                 r = -EINVAL;
3548         }
3549         radeon_scratch_free(rdev, scratch);
3550         return r;
3551 }
3552
3553 /**
3554  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3555  *
3556  * @rdev: radeon_device pointer
3557  * @ridx: radeon ring index
3558  *
3559  * Emits an hdp flush on the cp.
3560  */
3561 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3562                                        int ridx)
3563 {
3564         struct radeon_ring *ring = &rdev->ring[ridx];
3565         u32 ref_and_mask;
3566
3567         switch (ring->idx) {
3568         case CAYMAN_RING_TYPE_CP1_INDEX:
3569         case CAYMAN_RING_TYPE_CP2_INDEX:
3570         default:
3571                 switch (ring->me) {
3572                 case 0:
3573                         ref_and_mask = CP2 << ring->pipe;
3574                         break;
3575                 case 1:
3576                         ref_and_mask = CP6 << ring->pipe;
3577                         break;
3578                 default:
3579                         return;
3580                 }
3581                 break;
3582         case RADEON_RING_TYPE_GFX_INDEX:
3583                 ref_and_mask = CP0;
3584                 break;
3585         }
3586
3587         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3588         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3589                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3590                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3591         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3592         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3593         radeon_ring_write(ring, ref_and_mask);
3594         radeon_ring_write(ring, ref_and_mask);
3595         radeon_ring_write(ring, 0x20); /* poll interval */
3596 }
3597
3598 /**
3599  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3600  *
3601  * @rdev: radeon_device pointer
3602  * @fence: radeon fence object
3603  *
3604  * Emits a fence sequnce number on the gfx ring and flushes
3605  * GPU caches.
3606  */
3607 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3608                              struct radeon_fence *fence)
3609 {
3610         struct radeon_ring *ring = &rdev->ring[fence->ring];
3611         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3612
3613         /* Workaround for cache flush problems. First send a dummy EOP
3614          * event down the pipe with seq one below.
3615          */
3616         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3617         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3618                                  EOP_TC_ACTION_EN |
3619                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3620                                  EVENT_INDEX(5)));
3621         radeon_ring_write(ring, addr & 0xfffffffc);
3622         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3623                                 DATA_SEL(1) | INT_SEL(0));
3624         radeon_ring_write(ring, fence->seq - 1);
3625         radeon_ring_write(ring, 0);
3626
3627         /* Then send the real EOP event down the pipe. */
3628         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3629         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3630                                  EOP_TC_ACTION_EN |
3631                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3632                                  EVENT_INDEX(5)));
3633         radeon_ring_write(ring, addr & 0xfffffffc);
3634         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3635         radeon_ring_write(ring, fence->seq);
3636         radeon_ring_write(ring, 0);
3637 }
3638
3639 /**
3640  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3641  *
3642  * @rdev: radeon_device pointer
3643  * @fence: radeon fence object
3644  *
3645  * Emits a fence sequnce number on the compute ring and flushes
3646  * GPU caches.
3647  */
3648 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3649                                  struct radeon_fence *fence)
3650 {
3651         struct radeon_ring *ring = &rdev->ring[fence->ring];
3652         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3653
3654         /* RELEASE_MEM - flush caches, send int */
3655         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3656         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3657                                  EOP_TC_ACTION_EN |
3658                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3659                                  EVENT_INDEX(5)));
3660         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3661         radeon_ring_write(ring, addr & 0xfffffffc);
3662         radeon_ring_write(ring, upper_32_bits(addr));
3663         radeon_ring_write(ring, fence->seq);
3664         radeon_ring_write(ring, 0);
3665 }
3666
3667 /**
3668  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3669  *
3670  * @rdev: radeon_device pointer
3671  * @ring: radeon ring buffer object
3672  * @semaphore: radeon semaphore object
3673  * @emit_wait: Is this a sempahore wait?
3674  *
3675  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3676  * from running ahead of semaphore waits.
3677  */
3678 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3679                              struct radeon_ring *ring,
3680                              struct radeon_semaphore *semaphore,
3681                              bool emit_wait)
3682 {
3683         uint64_t addr = semaphore->gpu_addr;
3684         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3685
3686         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3687         radeon_ring_write(ring, lower_32_bits(addr));
3688         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3689
3690         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3691                 /* Prevent the PFP from running ahead of the semaphore wait */
3692                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3693                 radeon_ring_write(ring, 0x0);
3694         }
3695
3696         return true;
3697 }
3698
3699 /**
3700  * cik_copy_cpdma - copy pages using the CP DMA engine
3701  *
3702  * @rdev: radeon_device pointer
3703  * @src_offset: src GPU address
3704  * @dst_offset: dst GPU address
3705  * @num_gpu_pages: number of GPU pages to xfer
3706  * @resv: reservation object to sync to
3707  *
3708  * Copy GPU paging using the CP DMA engine (CIK+).
3709  * Used by the radeon ttm implementation to move pages if
3710  * registered as the asic copy callback.
3711  */
3712 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3713                                     uint64_t src_offset, uint64_t dst_offset,
3714                                     unsigned num_gpu_pages,
3715                                     struct reservation_object *resv)
3716 {
3717         struct radeon_fence *fence;
3718         struct radeon_sync sync;
3719         int ring_index = rdev->asic->copy.blit_ring_index;
3720         struct radeon_ring *ring = &rdev->ring[ring_index];
3721         u32 size_in_bytes, cur_size_in_bytes, control;
3722         int i, num_loops;
3723         int r = 0;
3724
3725         radeon_sync_create(&sync);
3726
3727         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3728         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3729         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3730         if (r) {
3731                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3732                 radeon_sync_free(rdev, &sync, NULL);
3733                 return ERR_PTR(r);
3734         }
3735
3736         radeon_sync_resv(rdev, &sync, resv, false);
3737         radeon_sync_rings(rdev, &sync, ring->idx);
3738
3739         for (i = 0; i < num_loops; i++) {
3740                 cur_size_in_bytes = size_in_bytes;
3741                 if (cur_size_in_bytes > 0x1fffff)
3742                         cur_size_in_bytes = 0x1fffff;
3743                 size_in_bytes -= cur_size_in_bytes;
3744                 control = 0;
3745                 if (size_in_bytes == 0)
3746                         control |= PACKET3_DMA_DATA_CP_SYNC;
3747                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3748                 radeon_ring_write(ring, control);
3749                 radeon_ring_write(ring, lower_32_bits(src_offset));
3750                 radeon_ring_write(ring, upper_32_bits(src_offset));
3751                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3752                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3753                 radeon_ring_write(ring, cur_size_in_bytes);
3754                 src_offset += cur_size_in_bytes;
3755                 dst_offset += cur_size_in_bytes;
3756         }
3757
3758         r = radeon_fence_emit(rdev, &fence, ring->idx);
3759         if (r) {
3760                 radeon_ring_unlock_undo(rdev, ring);
3761                 radeon_sync_free(rdev, &sync, NULL);
3762                 return ERR_PTR(r);
3763         }
3764
3765         radeon_ring_unlock_commit(rdev, ring, false);
3766         radeon_sync_free(rdev, &sync, fence);
3767
3768         return fence;
3769 }
3770
3771 /*
3772  * IB stuff
3773  */
3774 /**
3775  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3776  *
3777  * @rdev: radeon_device pointer
3778  * @ib: radeon indirect buffer object
3779  *
3780  * Emits a DE (drawing engine) or CE (constant engine) IB
3781  * on the gfx ring.  IBs are usually generated by userspace
3782  * acceleration drivers and submitted to the kernel for
3783  * scheduling on the ring.  This function schedules the IB
3784  * on the gfx ring for execution by the GPU.
3785  */
3786 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3787 {
3788         struct radeon_ring *ring = &rdev->ring[ib->ring];
3789         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3790         u32 header, control = INDIRECT_BUFFER_VALID;
3791
3792         if (ib->is_const_ib) {
3793                 /* set switch buffer packet before const IB */
3794                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3795                 radeon_ring_write(ring, 0);
3796
3797                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3798         } else {
3799                 u32 next_rptr;
3800                 if (ring->rptr_save_reg) {
3801                         next_rptr = ring->wptr + 3 + 4;
3802                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3803                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3804                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3805                         radeon_ring_write(ring, next_rptr);
3806                 } else if (rdev->wb.enabled) {
3807                         next_rptr = ring->wptr + 5 + 4;
3808                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3809                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3810                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3811                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3812                         radeon_ring_write(ring, next_rptr);
3813                 }
3814
3815                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3816         }
3817
3818         control |= ib->length_dw | (vm_id << 24);
3819
3820         radeon_ring_write(ring, header);
3821         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3822         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3823         radeon_ring_write(ring, control);
3824 }
3825
3826 /**
3827  * cik_ib_test - basic gfx ring IB test
3828  *
3829  * @rdev: radeon_device pointer
3830  * @ring: radeon_ring structure holding ring information
3831  *
3832  * Allocate an IB and execute it on the gfx ring (CIK).
3833  * Provides a basic gfx ring test to verify that IBs are working.
3834  * Returns 0 on success, error on failure.
3835  */
3836 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3837 {
3838         struct radeon_ib ib;
3839         uint32_t scratch;
3840         uint32_t tmp = 0;
3841         unsigned i;
3842         int r;
3843
3844         r = radeon_scratch_get(rdev, &scratch);
3845         if (r) {
3846                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3847                 return r;
3848         }
3849         WREG32(scratch, 0xCAFEDEAD);
3850         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3851         if (r) {
3852                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3853                 radeon_scratch_free(rdev, scratch);
3854                 return r;
3855         }
3856         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3857         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3858         ib.ptr[2] = 0xDEADBEEF;
3859         ib.length_dw = 3;
3860         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3861         if (r) {
3862                 radeon_scratch_free(rdev, scratch);
3863                 radeon_ib_free(rdev, &ib);
3864                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3865                 return r;
3866         }
3867         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3868                 RADEON_USEC_IB_TEST_TIMEOUT));
3869         if (r < 0) {
3870                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3871                 radeon_scratch_free(rdev, scratch);
3872                 radeon_ib_free(rdev, &ib);
3873                 return r;
3874         } else if (r == 0) {
3875                 DRM_ERROR("radeon: fence wait timed out.\n");
3876                 radeon_scratch_free(rdev, scratch);
3877                 radeon_ib_free(rdev, &ib);
3878                 return -ETIMEDOUT;
3879         }
3880         r = 0;
3881         for (i = 0; i < rdev->usec_timeout; i++) {
3882                 tmp = RREG32(scratch);
3883                 if (tmp == 0xDEADBEEF)
3884                         break;
3885                 DRM_UDELAY(1);
3886         }
3887         if (i < rdev->usec_timeout) {
3888                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3889         } else {
3890                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3891                           scratch, tmp);
3892                 r = -EINVAL;
3893         }
3894         radeon_scratch_free(rdev, scratch);
3895         radeon_ib_free(rdev, &ib);
3896         return r;
3897 }
3898
3899 /*
3900  * CP.
3901  * On CIK, gfx and compute now have independant command processors.
3902  *
3903  * GFX
3904  * Gfx consists of a single ring and can process both gfx jobs and
3905  * compute jobs.  The gfx CP consists of three microengines (ME):
3906  * PFP - Pre-Fetch Parser
3907  * ME - Micro Engine
3908  * CE - Constant Engine
3909  * The PFP and ME make up what is considered the Drawing Engine (DE).
3910  * The CE is an asynchronous engine used for updating buffer desciptors
3911  * used by the DE so that they can be loaded into cache in parallel
3912  * while the DE is processing state update packets.
3913  *
3914  * Compute
3915  * The compute CP consists of two microengines (ME):
3916  * MEC1 - Compute MicroEngine 1
3917  * MEC2 - Compute MicroEngine 2
3918  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3919  * The queues are exposed to userspace and are programmed directly
3920  * by the compute runtime.
3921  */
3922 /**
3923  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3924  *
3925  * @rdev: radeon_device pointer
3926  * @enable: enable or disable the MEs
3927  *
3928  * Halts or unhalts the gfx MEs.
3929  */
3930 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3931 {
3932         if (enable)
3933                 WREG32(CP_ME_CNTL, 0);
3934         else {
3935                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3936                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3937                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3938                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3939         }
3940         udelay(50);
3941 }
3942
3943 /**
3944  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3945  *
3946  * @rdev: radeon_device pointer
3947  *
3948  * Loads the gfx PFP, ME, and CE ucode.
3949  * Returns 0 for success, -EINVAL if the ucode is not available.
3950  */
3951 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3952 {
3953         int i;
3954
3955         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3956                 return -EINVAL;
3957
3958         cik_cp_gfx_enable(rdev, false);
3959
3960         if (rdev->new_fw) {
3961                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3962                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3963                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3964                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3965                 const struct gfx_firmware_header_v1_0 *me_hdr =
3966                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3967                 const __le32 *fw_data;
3968                 u32 fw_size;
3969
3970                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3971                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3972                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3973
3974                 /* PFP */
3975                 fw_data = (const __le32 *)
3976                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3977                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3978                 WREG32(CP_PFP_UCODE_ADDR, 0);
3979                 for (i = 0; i < fw_size; i++)
3980                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3981                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3982
3983                 /* CE */
3984                 fw_data = (const __le32 *)
3985                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3986                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3987                 WREG32(CP_CE_UCODE_ADDR, 0);
3988                 for (i = 0; i < fw_size; i++)
3989                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3990                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3991
3992                 /* ME */
3993                 fw_data = (const __be32 *)
3994                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3995                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3996                 WREG32(CP_ME_RAM_WADDR, 0);
3997                 for (i = 0; i < fw_size; i++)
3998                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3999                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4000                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4001         } else {
4002                 const __be32 *fw_data;
4003
4004                 /* PFP */
4005                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4006                 WREG32(CP_PFP_UCODE_ADDR, 0);
4007                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4008                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4009                 WREG32(CP_PFP_UCODE_ADDR, 0);
4010
4011                 /* CE */
4012                 fw_data = (const __be32 *)rdev->ce_fw->data;
4013                 WREG32(CP_CE_UCODE_ADDR, 0);
4014                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4015                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4016                 WREG32(CP_CE_UCODE_ADDR, 0);
4017
4018                 /* ME */
4019                 fw_data = (const __be32 *)rdev->me_fw->data;
4020                 WREG32(CP_ME_RAM_WADDR, 0);
4021                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4022                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4023                 WREG32(CP_ME_RAM_WADDR, 0);
4024         }
4025
4026         return 0;
4027 }
4028
4029 /**
4030  * cik_cp_gfx_start - start the gfx ring
4031  *
4032  * @rdev: radeon_device pointer
4033  *
4034  * Enables the ring and loads the clear state context and other
4035  * packets required to init the ring.
4036  * Returns 0 for success, error for failure.
4037  */
4038 static int cik_cp_gfx_start(struct radeon_device *rdev)
4039 {
4040         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4041         int r, i;
4042
4043         /* init the CP */
4044         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4045         WREG32(CP_ENDIAN_SWAP, 0);
4046         WREG32(CP_DEVICE_ID, 1);
4047
4048         cik_cp_gfx_enable(rdev, true);
4049
4050         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4051         if (r) {
4052                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4053                 return r;
4054         }
4055
4056         /* init the CE partitions.  CE only used for gfx on CIK */
4057         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4058         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4059         radeon_ring_write(ring, 0x8000);
4060         radeon_ring_write(ring, 0x8000);
4061
4062         /* setup clear context state */
4063         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4064         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4065
4066         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4067         radeon_ring_write(ring, 0x80000000);
4068         radeon_ring_write(ring, 0x80000000);
4069
4070         for (i = 0; i < cik_default_size; i++)
4071                 radeon_ring_write(ring, cik_default_state[i]);
4072
4073         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4074         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4075
4076         /* set clear context state */
4077         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4078         radeon_ring_write(ring, 0);
4079
4080         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4081         radeon_ring_write(ring, 0x00000316);
4082         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4083         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4084
4085         radeon_ring_unlock_commit(rdev, ring, false);
4086
4087         return 0;
4088 }
4089
4090 /**
4091  * cik_cp_gfx_fini - stop the gfx ring
4092  *
4093  * @rdev: radeon_device pointer
4094  *
4095  * Stop the gfx ring and tear down the driver ring
4096  * info.
4097  */
4098 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4099 {
4100         cik_cp_gfx_enable(rdev, false);
4101         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102 }
4103
4104 /**
4105  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4106  *
4107  * @rdev: radeon_device pointer
4108  *
4109  * Program the location and size of the gfx ring buffer
4110  * and test it to make sure it's working.
4111  * Returns 0 for success, error for failure.
4112  */
4113 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4114 {
4115         struct radeon_ring *ring;
4116         u32 tmp;
4117         u32 rb_bufsz;
4118         u64 rb_addr;
4119         int r;
4120
4121         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4122         if (rdev->family != CHIP_HAWAII)
4123                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4124
4125         /* Set the write pointer delay */
4126         WREG32(CP_RB_WPTR_DELAY, 0);
4127
4128         /* set the RB to use vmid 0 */
4129         WREG32(CP_RB_VMID, 0);
4130
4131         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4132
4133         /* ring 0 - compute and gfx */
4134         /* Set ring buffer size */
4135         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4136         rb_bufsz = order_base_2(ring->ring_size / 8);
4137         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4138 #ifdef __BIG_ENDIAN
4139         tmp |= BUF_SWAP_32BIT;
4140 #endif
4141         WREG32(CP_RB0_CNTL, tmp);
4142
4143         /* Initialize the ring buffer's read and write pointers */
4144         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4145         ring->wptr = 0;
4146         WREG32(CP_RB0_WPTR, ring->wptr);
4147
4148         /* set the wb address wether it's enabled or not */
4149         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4150         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4151
4152         /* scratch register shadowing is no longer supported */
4153         WREG32(SCRATCH_UMSK, 0);
4154
4155         if (!rdev->wb.enabled)
4156                 tmp |= RB_NO_UPDATE;
4157
4158         mdelay(1);
4159         WREG32(CP_RB0_CNTL, tmp);
4160
4161         rb_addr = ring->gpu_addr >> 8;
4162         WREG32(CP_RB0_BASE, rb_addr);
4163         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4164
4165         /* start the ring */
4166         cik_cp_gfx_start(rdev);
4167         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4168         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4169         if (r) {
4170                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4171                 return r;
4172         }
4173
4174         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4175                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4176
4177         return 0;
4178 }
4179
4180 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4181                      struct radeon_ring *ring)
4182 {
4183         u32 rptr;
4184
4185         if (rdev->wb.enabled)
4186                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4187         else
4188                 rptr = RREG32(CP_RB0_RPTR);
4189
4190         return rptr;
4191 }
4192
4193 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4194                      struct radeon_ring *ring)
4195 {
4196         return RREG32(CP_RB0_WPTR);
4197 }
4198
4199 void cik_gfx_set_wptr(struct radeon_device *rdev,
4200                       struct radeon_ring *ring)
4201 {
4202         WREG32(CP_RB0_WPTR, ring->wptr);
4203         (void)RREG32(CP_RB0_WPTR);
4204 }
4205
4206 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4207                          struct radeon_ring *ring)
4208 {
4209         u32 rptr;
4210
4211         if (rdev->wb.enabled) {
4212                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4213         } else {
4214                 mutex_lock(&rdev->srbm_mutex);
4215                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4216                 rptr = RREG32(CP_HQD_PQ_RPTR);
4217                 cik_srbm_select(rdev, 0, 0, 0, 0);
4218                 mutex_unlock(&rdev->srbm_mutex);
4219         }
4220
4221         return rptr;
4222 }
4223
4224 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4225                          struct radeon_ring *ring)
4226 {
4227         u32 wptr;
4228
4229         if (rdev->wb.enabled) {
4230                 /* XXX check if swapping is necessary on BE */
4231                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4232         } else {
4233                 mutex_lock(&rdev->srbm_mutex);
4234                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4235                 wptr = RREG32(CP_HQD_PQ_WPTR);
4236                 cik_srbm_select(rdev, 0, 0, 0, 0);
4237                 mutex_unlock(&rdev->srbm_mutex);
4238         }
4239
4240         return wptr;
4241 }
4242
4243 void cik_compute_set_wptr(struct radeon_device *rdev,
4244                           struct radeon_ring *ring)
4245 {
4246         /* XXX check if swapping is necessary on BE */
4247         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4248         WDOORBELL32(ring->doorbell_index, ring->wptr);
4249 }
4250
4251 static void cik_compute_stop(struct radeon_device *rdev,
4252                              struct radeon_ring *ring)
4253 {
4254         u32 j, tmp;
4255
4256         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4257         /* Disable wptr polling. */
4258         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4259         tmp &= ~WPTR_POLL_EN;
4260         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4261         /* Disable HQD. */
4262         if (RREG32(CP_HQD_ACTIVE) & 1) {
4263                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4264                 for (j = 0; j < rdev->usec_timeout; j++) {
4265                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4266                                 break;
4267                         udelay(1);
4268                 }
4269                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4270                 WREG32(CP_HQD_PQ_RPTR, 0);
4271                 WREG32(CP_HQD_PQ_WPTR, 0);
4272         }
4273         cik_srbm_select(rdev, 0, 0, 0, 0);
4274 }
4275
4276 /**
4277  * cik_cp_compute_enable - enable/disable the compute CP MEs
4278  *
4279  * @rdev: radeon_device pointer
4280  * @enable: enable or disable the MEs
4281  *
4282  * Halts or unhalts the compute MEs.
4283  */
4284 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4285 {
4286         if (enable)
4287                 WREG32(CP_MEC_CNTL, 0);
4288         else {
4289                 /*
4290                  * To make hibernation reliable we need to clear compute ring
4291                  * configuration before halting the compute ring.
4292                  */
4293                 mutex_lock(&rdev->srbm_mutex);
4294                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4295                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4296                 mutex_unlock(&rdev->srbm_mutex);
4297
4298                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4299                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4300                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4301         }
4302         udelay(50);
4303 }
4304
4305 /**
4306  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Loads the compute MEC1&2 ucode.
4311  * Returns 0 for success, -EINVAL if the ucode is not available.
4312  */
4313 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4314 {
4315         int i;
4316
4317         if (!rdev->mec_fw)
4318                 return -EINVAL;
4319
4320         cik_cp_compute_enable(rdev, false);
4321
4322         if (rdev->new_fw) {
4323                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4324                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4325                 const __le32 *fw_data;
4326                 u32 fw_size;
4327
4328                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4329
4330                 /* MEC1 */
4331                 fw_data = (const __le32 *)
4332                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335                 for (i = 0; i < fw_size; i++)
4336                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4337                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4338
4339                 /* MEC2 */
4340                 if (rdev->family == CHIP_KAVERI) {
4341                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4342                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4343
4344                         fw_data = (const __le32 *)
4345                                 (rdev->mec2_fw->data +
4346                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4347                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4348                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349                         for (i = 0; i < fw_size; i++)
4350                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4351                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4352                 }
4353         } else {
4354                 const __be32 *fw_data;
4355
4356                 /* MEC1 */
4357                 fw_data = (const __be32 *)rdev->mec_fw->data;
4358                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4359                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4360                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4361                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4362
4363                 if (rdev->family == CHIP_KAVERI) {
4364                         /* MEC2 */
4365                         fw_data = (const __be32 *)rdev->mec_fw->data;
4366                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4368                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4369                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4370                 }
4371         }
4372
4373         return 0;
4374 }
4375
4376 /**
4377  * cik_cp_compute_start - start the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Enable the compute queues.
4382  * Returns 0 for success, error for failure.
4383  */
4384 static int cik_cp_compute_start(struct radeon_device *rdev)
4385 {
4386         cik_cp_compute_enable(rdev, true);
4387
4388         return 0;
4389 }
4390
4391 /**
4392  * cik_cp_compute_fini - stop the compute queues
4393  *
4394  * @rdev: radeon_device pointer
4395  *
4396  * Stop the compute queues and tear down the driver queue
4397  * info.
4398  */
4399 static void cik_cp_compute_fini(struct radeon_device *rdev)
4400 {
4401         int i, idx, r;
4402
4403         cik_cp_compute_enable(rdev, false);
4404
4405         for (i = 0; i < 2; i++) {
4406                 if (i == 0)
4407                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4408                 else
4409                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4410
4411                 if (rdev->ring[idx].mqd_obj) {
4412                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4413                         if (unlikely(r != 0))
4414                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4415
4416                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4417                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4418
4419                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4420                         rdev->ring[idx].mqd_obj = NULL;
4421                 }
4422         }
4423 }
4424
4425 static void cik_mec_fini(struct radeon_device *rdev)
4426 {
4427         int r;
4428
4429         if (rdev->mec.hpd_eop_obj) {
4430                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4431                 if (unlikely(r != 0))
4432                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4433                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4434                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4435
4436                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4437                 rdev->mec.hpd_eop_obj = NULL;
4438         }
4439 }
4440
4441 #define MEC_HPD_SIZE 2048
4442
4443 static int cik_mec_init(struct radeon_device *rdev)
4444 {
4445         int r;
4446         u32 *hpd;
4447
4448         /*
4449          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4450          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4451          * Nonetheless, we assign only 1 pipe because all other pipes will
4452          * be handled by KFD
4453          */
4454         rdev->mec.num_mec = 1;
4455         rdev->mec.num_pipe = 1;
4456         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4457
4458         if (rdev->mec.hpd_eop_obj == NULL) {
4459                 r = radeon_bo_create(rdev,
4460                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4461                                      PAGE_SIZE, true,
4462                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4463                                      &rdev->mec.hpd_eop_obj);
4464                 if (r) {
4465                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4466                         return r;
4467                 }
4468         }
4469
4470         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4471         if (unlikely(r != 0)) {
4472                 cik_mec_fini(rdev);
4473                 return r;
4474         }
4475         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4476                           &rdev->mec.hpd_eop_gpu_addr);
4477         if (r) {
4478                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4479                 cik_mec_fini(rdev);
4480                 return r;
4481         }
4482         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4483         if (r) {
4484                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4485                 cik_mec_fini(rdev);
4486                 return r;
4487         }
4488
4489         /* clear memory.  Not sure if this is required or not */
4490         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4491
4492         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4493         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4494
4495         return 0;
4496 }
4497
4498 struct hqd_registers
4499 {
4500         u32 cp_mqd_base_addr;
4501         u32 cp_mqd_base_addr_hi;
4502         u32 cp_hqd_active;
4503         u32 cp_hqd_vmid;
4504         u32 cp_hqd_persistent_state;
4505         u32 cp_hqd_pipe_priority;
4506         u32 cp_hqd_queue_priority;
4507         u32 cp_hqd_quantum;
4508         u32 cp_hqd_pq_base;
4509         u32 cp_hqd_pq_base_hi;
4510         u32 cp_hqd_pq_rptr;
4511         u32 cp_hqd_pq_rptr_report_addr;
4512         u32 cp_hqd_pq_rptr_report_addr_hi;
4513         u32 cp_hqd_pq_wptr_poll_addr;
4514         u32 cp_hqd_pq_wptr_poll_addr_hi;
4515         u32 cp_hqd_pq_doorbell_control;
4516         u32 cp_hqd_pq_wptr;
4517         u32 cp_hqd_pq_control;
4518         u32 cp_hqd_ib_base_addr;
4519         u32 cp_hqd_ib_base_addr_hi;
4520         u32 cp_hqd_ib_rptr;
4521         u32 cp_hqd_ib_control;
4522         u32 cp_hqd_iq_timer;
4523         u32 cp_hqd_iq_rptr;
4524         u32 cp_hqd_dequeue_request;
4525         u32 cp_hqd_dma_offload;
4526         u32 cp_hqd_sema_cmd;
4527         u32 cp_hqd_msg_type;
4528         u32 cp_hqd_atomic0_preop_lo;
4529         u32 cp_hqd_atomic0_preop_hi;
4530         u32 cp_hqd_atomic1_preop_lo;
4531         u32 cp_hqd_atomic1_preop_hi;
4532         u32 cp_hqd_hq_scheduler0;
4533         u32 cp_hqd_hq_scheduler1;
4534         u32 cp_mqd_control;
4535 };
4536
4537 struct bonaire_mqd
4538 {
4539         u32 header;
4540         u32 dispatch_initiator;
4541         u32 dimensions[3];
4542         u32 start_idx[3];
4543         u32 num_threads[3];
4544         u32 pipeline_stat_enable;
4545         u32 perf_counter_enable;
4546         u32 pgm[2];
4547         u32 tba[2];
4548         u32 tma[2];
4549         u32 pgm_rsrc[2];
4550         u32 vmid;
4551         u32 resource_limits;
4552         u32 static_thread_mgmt01[2];
4553         u32 tmp_ring_size;
4554         u32 static_thread_mgmt23[2];
4555         u32 restart[3];
4556         u32 thread_trace_enable;
4557         u32 reserved1;
4558         u32 user_data[16];
4559         u32 vgtcs_invoke_count[2];
4560         struct hqd_registers queue_state;
4561         u32 dequeue_cntr;
4562         u32 interrupt_queue[64];
4563 };
4564
4565 /**
4566  * cik_cp_compute_resume - setup the compute queue registers
4567  *
4568  * @rdev: radeon_device pointer
4569  *
4570  * Program the compute queues and test them to make sure they
4571  * are working.
4572  * Returns 0 for success, error for failure.
4573  */
4574 static int cik_cp_compute_resume(struct radeon_device *rdev)
4575 {
4576         int r, i, j, idx;
4577         u32 tmp;
4578         bool use_doorbell = true;
4579         u64 hqd_gpu_addr;
4580         u64 mqd_gpu_addr;
4581         u64 eop_gpu_addr;
4582         u64 wb_gpu_addr;
4583         u32 *buf;
4584         struct bonaire_mqd *mqd;
4585
4586         r = cik_cp_compute_start(rdev);
4587         if (r)
4588                 return r;
4589
4590         /* fix up chicken bits */
4591         tmp = RREG32(CP_CPF_DEBUG);
4592         tmp |= (1 << 23);
4593         WREG32(CP_CPF_DEBUG, tmp);
4594
4595         /* init the pipes */
4596         mutex_lock(&rdev->srbm_mutex);
4597
4598         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4599
4600         cik_srbm_select(rdev, 0, 0, 0, 0);
4601
4602         /* write the EOP addr */
4603         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4604         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4605
4606         /* set the VMID assigned */
4607         WREG32(CP_HPD_EOP_VMID, 0);
4608
4609         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610         tmp = RREG32(CP_HPD_EOP_CONTROL);
4611         tmp &= ~EOP_SIZE_MASK;
4612         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4613         WREG32(CP_HPD_EOP_CONTROL, tmp);
4614
4615         mutex_unlock(&rdev->srbm_mutex);
4616
4617         /* init the queues.  Just two for now. */
4618         for (i = 0; i < 2; i++) {
4619                 if (i == 0)
4620                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4621                 else
4622                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4623
4624                 if (rdev->ring[idx].mqd_obj == NULL) {
4625                         r = radeon_bo_create(rdev,
4626                                              sizeof(struct bonaire_mqd),
4627                                              PAGE_SIZE, true,
4628                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4629                                              NULL, &rdev->ring[idx].mqd_obj);
4630                         if (r) {
4631                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4632                                 return r;
4633                         }
4634                 }
4635
4636                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4637                 if (unlikely(r != 0)) {
4638                         cik_cp_compute_fini(rdev);
4639                         return r;
4640                 }
4641                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4642                                   &mqd_gpu_addr);
4643                 if (r) {
4644                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4645                         cik_cp_compute_fini(rdev);
4646                         return r;
4647                 }
4648                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4649                 if (r) {
4650                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4651                         cik_cp_compute_fini(rdev);
4652                         return r;
4653                 }
4654
4655                 /* init the mqd struct */
4656                 memset(buf, 0, sizeof(struct bonaire_mqd));
4657
4658                 mqd = (struct bonaire_mqd *)buf;
4659                 mqd->header = 0xC0310800;
4660                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4661                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4662                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4663                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4664
4665                 mutex_lock(&rdev->srbm_mutex);
4666                 cik_srbm_select(rdev, rdev->ring[idx].me,
4667                                 rdev->ring[idx].pipe,
4668                                 rdev->ring[idx].queue, 0);
4669
4670                 /* disable wptr polling */
4671                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4672                 tmp &= ~WPTR_POLL_EN;
4673                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4674
4675                 /* enable doorbell? */
4676                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4677                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4678                 if (use_doorbell)
4679                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4680                 else
4681                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4682                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4683                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4684
4685                 /* disable the queue if it's active */
4686                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4687                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4688                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4689                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4690                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4691                         for (j = 0; j < rdev->usec_timeout; j++) {
4692                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4693                                         break;
4694                                 udelay(1);
4695                         }
4696                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4697                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4698                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4699                 }
4700
4701                 /* set the pointer to the MQD */
4702                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4703                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4704                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4705                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4706                 /* set MQD vmid to 0 */
4707                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4708                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4709                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4710
4711                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4712                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4713                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4714                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4715                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4716                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4717
4718                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4719                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4720                 mqd->queue_state.cp_hqd_pq_control &=
4721                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4722
4723                 mqd->queue_state.cp_hqd_pq_control |=
4724                         order_base_2(rdev->ring[idx].ring_size / 8);
4725                 mqd->queue_state.cp_hqd_pq_control |=
4726                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4727 #ifdef __BIG_ENDIAN
4728                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4729 #endif
4730                 mqd->queue_state.cp_hqd_pq_control &=
4731                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4732                 mqd->queue_state.cp_hqd_pq_control |=
4733                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4734                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4735
4736                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4737                 if (i == 0)
4738                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4739                 else
4740                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4741                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4742                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4744                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4745                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4746
4747                 /* set the wb address wether it's enabled or not */
4748                 if (i == 0)
4749                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4750                 else
4751                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4752                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4753                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4754                         upper_32_bits(wb_gpu_addr) & 0xffff;
4755                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4756                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4757                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4758                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4759
4760                 /* enable the doorbell if requested */
4761                 if (use_doorbell) {
4762                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4763                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4764                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4765                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4766                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4767                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4768                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4769                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4770
4771                 } else {
4772                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4773                 }
4774                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4775                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4776
4777                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4778                 rdev->ring[idx].wptr = 0;
4779                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4780                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4781                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4782
4783                 /* set the vmid for the queue */
4784                 mqd->queue_state.cp_hqd_vmid = 0;
4785                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4786
4787                 /* activate the queue */
4788                 mqd->queue_state.cp_hqd_active = 1;
4789                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4790
4791                 cik_srbm_select(rdev, 0, 0, 0, 0);
4792                 mutex_unlock(&rdev->srbm_mutex);
4793
4794                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4795                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4796
4797                 rdev->ring[idx].ready = true;
4798                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4799                 if (r)
4800                         rdev->ring[idx].ready = false;
4801         }
4802
4803         return 0;
4804 }
4805
4806 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4807 {
4808         cik_cp_gfx_enable(rdev, enable);
4809         cik_cp_compute_enable(rdev, enable);
4810 }
4811
4812 static int cik_cp_load_microcode(struct radeon_device *rdev)
4813 {
4814         int r;
4815
4816         r = cik_cp_gfx_load_microcode(rdev);
4817         if (r)
4818                 return r;
4819         r = cik_cp_compute_load_microcode(rdev);
4820         if (r)
4821                 return r;
4822
4823         return 0;
4824 }
4825
4826 static void cik_cp_fini(struct radeon_device *rdev)
4827 {
4828         cik_cp_gfx_fini(rdev);
4829         cik_cp_compute_fini(rdev);
4830 }
4831
4832 static int cik_cp_resume(struct radeon_device *rdev)
4833 {
4834         int r;
4835
4836         cik_enable_gui_idle_interrupt(rdev, false);
4837
4838         r = cik_cp_load_microcode(rdev);
4839         if (r)
4840                 return r;
4841
4842         r = cik_cp_gfx_resume(rdev);
4843         if (r)
4844                 return r;
4845         r = cik_cp_compute_resume(rdev);
4846         if (r)
4847                 return r;
4848
4849         cik_enable_gui_idle_interrupt(rdev, true);
4850
4851         return 0;
4852 }
4853
4854 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4855 {
4856         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4857                 RREG32(GRBM_STATUS));
4858         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4859                 RREG32(GRBM_STATUS2));
4860         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4861                 RREG32(GRBM_STATUS_SE0));
4862         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4863                 RREG32(GRBM_STATUS_SE1));
4864         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4865                 RREG32(GRBM_STATUS_SE2));
4866         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4867                 RREG32(GRBM_STATUS_SE3));
4868         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4869                 RREG32(SRBM_STATUS));
4870         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4871                 RREG32(SRBM_STATUS2));
4872         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4873                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4874         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4875                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4876         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4877         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4878                  RREG32(CP_STALLED_STAT1));
4879         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4880                  RREG32(CP_STALLED_STAT2));
4881         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4882                  RREG32(CP_STALLED_STAT3));
4883         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4884                  RREG32(CP_CPF_BUSY_STAT));
4885         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4886                  RREG32(CP_CPF_STALLED_STAT1));
4887         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4888         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4889         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4890                  RREG32(CP_CPC_STALLED_STAT1));
4891         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4892 }
4893
4894 /**
4895  * cik_gpu_check_soft_reset - check which blocks are busy
4896  *
4897  * @rdev: radeon_device pointer
4898  *
4899  * Check which blocks are busy and return the relevant reset
4900  * mask to be used by cik_gpu_soft_reset().
4901  * Returns a mask of the blocks to be reset.
4902  */
4903 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4904 {
4905         u32 reset_mask = 0;
4906         u32 tmp;
4907
4908         /* GRBM_STATUS */
4909         tmp = RREG32(GRBM_STATUS);
4910         if (tmp & (PA_BUSY | SC_BUSY |
4911                    BCI_BUSY | SX_BUSY |
4912                    TA_BUSY | VGT_BUSY |
4913                    DB_BUSY | CB_BUSY |
4914                    GDS_BUSY | SPI_BUSY |
4915                    IA_BUSY | IA_BUSY_NO_DMA))
4916                 reset_mask |= RADEON_RESET_GFX;
4917
4918         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4919                 reset_mask |= RADEON_RESET_CP;
4920
4921         /* GRBM_STATUS2 */
4922         tmp = RREG32(GRBM_STATUS2);
4923         if (tmp & RLC_BUSY)
4924                 reset_mask |= RADEON_RESET_RLC;
4925
4926         /* SDMA0_STATUS_REG */
4927         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4928         if (!(tmp & SDMA_IDLE))
4929                 reset_mask |= RADEON_RESET_DMA;
4930
4931         /* SDMA1_STATUS_REG */
4932         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4933         if (!(tmp & SDMA_IDLE))
4934                 reset_mask |= RADEON_RESET_DMA1;
4935
4936         /* SRBM_STATUS2 */
4937         tmp = RREG32(SRBM_STATUS2);
4938         if (tmp & SDMA_BUSY)
4939                 reset_mask |= RADEON_RESET_DMA;
4940
4941         if (tmp & SDMA1_BUSY)
4942                 reset_mask |= RADEON_RESET_DMA1;
4943
4944         /* SRBM_STATUS */
4945         tmp = RREG32(SRBM_STATUS);
4946
4947         if (tmp & IH_BUSY)
4948                 reset_mask |= RADEON_RESET_IH;
4949
4950         if (tmp & SEM_BUSY)
4951                 reset_mask |= RADEON_RESET_SEM;
4952
4953         if (tmp & GRBM_RQ_PENDING)
4954                 reset_mask |= RADEON_RESET_GRBM;
4955
4956         if (tmp & VMC_BUSY)
4957                 reset_mask |= RADEON_RESET_VMC;
4958
4959         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4960                    MCC_BUSY | MCD_BUSY))
4961                 reset_mask |= RADEON_RESET_MC;
4962
4963         if (evergreen_is_display_hung(rdev))
4964                 reset_mask |= RADEON_RESET_DISPLAY;
4965
4966         /* Skip MC reset as it's mostly likely not hung, just busy */
4967         if (reset_mask & RADEON_RESET_MC) {
4968                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4969                 reset_mask &= ~RADEON_RESET_MC;
4970         }
4971
4972         return reset_mask;
4973 }
4974
4975 /**
4976  * cik_gpu_soft_reset - soft reset GPU
4977  *
4978  * @rdev: radeon_device pointer
4979  * @reset_mask: mask of which blocks to reset
4980  *
4981  * Soft reset the blocks specified in @reset_mask.
4982  */
4983 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4984 {
4985         struct evergreen_mc_save save;
4986         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4987         u32 tmp;
4988
4989         if (reset_mask == 0)
4990                 return;
4991
4992         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4993
4994         cik_print_gpu_status_regs(rdev);
4995         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4996                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4997         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4998                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4999
5000         /* disable CG/PG */
5001         cik_fini_pg(rdev);
5002         cik_fini_cg(rdev);
5003
5004         /* stop the rlc */
5005         cik_rlc_stop(rdev);
5006
5007         /* Disable GFX parsing/prefetching */
5008         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5009
5010         /* Disable MEC parsing/prefetching */
5011         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5012
5013         if (reset_mask & RADEON_RESET_DMA) {
5014                 /* sdma0 */
5015                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5016                 tmp |= SDMA_HALT;
5017                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5018         }
5019         if (reset_mask & RADEON_RESET_DMA1) {
5020                 /* sdma1 */
5021                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5022                 tmp |= SDMA_HALT;
5023                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5024         }
5025
5026         evergreen_mc_stop(rdev, &save);
5027         if (evergreen_mc_wait_for_idle(rdev)) {
5028                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5029         }
5030
5031         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5032                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5033
5034         if (reset_mask & RADEON_RESET_CP) {
5035                 grbm_soft_reset |= SOFT_RESET_CP;
5036
5037                 srbm_soft_reset |= SOFT_RESET_GRBM;
5038         }
5039
5040         if (reset_mask & RADEON_RESET_DMA)
5041                 srbm_soft_reset |= SOFT_RESET_SDMA;
5042
5043         if (reset_mask & RADEON_RESET_DMA1)
5044                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5045
5046         if (reset_mask & RADEON_RESET_DISPLAY)
5047                 srbm_soft_reset |= SOFT_RESET_DC;
5048
5049         if (reset_mask & RADEON_RESET_RLC)
5050                 grbm_soft_reset |= SOFT_RESET_RLC;
5051
5052         if (reset_mask & RADEON_RESET_SEM)
5053                 srbm_soft_reset |= SOFT_RESET_SEM;
5054
5055         if (reset_mask & RADEON_RESET_IH)
5056                 srbm_soft_reset |= SOFT_RESET_IH;
5057
5058         if (reset_mask & RADEON_RESET_GRBM)
5059                 srbm_soft_reset |= SOFT_RESET_GRBM;
5060
5061         if (reset_mask & RADEON_RESET_VMC)
5062                 srbm_soft_reset |= SOFT_RESET_VMC;
5063
5064         if (!(rdev->flags & RADEON_IS_IGP)) {
5065                 if (reset_mask & RADEON_RESET_MC)
5066                         srbm_soft_reset |= SOFT_RESET_MC;
5067         }
5068
5069         if (grbm_soft_reset) {
5070                 tmp = RREG32(GRBM_SOFT_RESET);
5071                 tmp |= grbm_soft_reset;
5072                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5073                 WREG32(GRBM_SOFT_RESET, tmp);
5074                 tmp = RREG32(GRBM_SOFT_RESET);
5075
5076                 udelay(50);
5077
5078                 tmp &= ~grbm_soft_reset;
5079                 WREG32(GRBM_SOFT_RESET, tmp);
5080                 tmp = RREG32(GRBM_SOFT_RESET);
5081         }
5082
5083         if (srbm_soft_reset) {
5084                 tmp = RREG32(SRBM_SOFT_RESET);
5085                 tmp |= srbm_soft_reset;
5086                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5087                 WREG32(SRBM_SOFT_RESET, tmp);
5088                 tmp = RREG32(SRBM_SOFT_RESET);
5089
5090                 udelay(50);
5091
5092                 tmp &= ~srbm_soft_reset;
5093                 WREG32(SRBM_SOFT_RESET, tmp);
5094                 tmp = RREG32(SRBM_SOFT_RESET);
5095         }
5096
5097         /* Wait a little for things to settle down */
5098         udelay(50);
5099
5100         evergreen_mc_resume(rdev, &save);
5101         udelay(50);
5102
5103         cik_print_gpu_status_regs(rdev);
5104 }
5105
5106 struct kv_reset_save_regs {
5107         u32 gmcon_reng_execute;
5108         u32 gmcon_misc;
5109         u32 gmcon_misc3;
5110 };
5111
5112 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5113                                    struct kv_reset_save_regs *save)
5114 {
5115         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5116         save->gmcon_misc = RREG32(GMCON_MISC);
5117         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5118
5119         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5120         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5121                                                 STCTRL_STUTTER_EN));
5122 }
5123
5124 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5125                                       struct kv_reset_save_regs *save)
5126 {
5127         int i;
5128
5129         WREG32(GMCON_PGFSM_WRITE, 0);
5130         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5131
5132         for (i = 0; i < 5; i++)
5133                 WREG32(GMCON_PGFSM_WRITE, 0);
5134
5135         WREG32(GMCON_PGFSM_WRITE, 0);
5136         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5137
5138         for (i = 0; i < 5; i++)
5139                 WREG32(GMCON_PGFSM_WRITE, 0);
5140
5141         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5142         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5143
5144         for (i = 0; i < 5; i++)
5145                 WREG32(GMCON_PGFSM_WRITE, 0);
5146
5147         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5148         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5149
5150         for (i = 0; i < 5; i++)
5151                 WREG32(GMCON_PGFSM_WRITE, 0);
5152
5153         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5154         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5155
5156         for (i = 0; i < 5; i++)
5157                 WREG32(GMCON_PGFSM_WRITE, 0);
5158
5159         WREG32(GMCON_PGFSM_WRITE, 0);
5160         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5161
5162         for (i = 0; i < 5; i++)
5163                 WREG32(GMCON_PGFSM_WRITE, 0);
5164
5165         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5166         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5167
5168         for (i = 0; i < 5; i++)
5169                 WREG32(GMCON_PGFSM_WRITE, 0);
5170
5171         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5172         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5173
5174         for (i = 0; i < 5; i++)
5175                 WREG32(GMCON_PGFSM_WRITE, 0);
5176
5177         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5178         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5179
5180         for (i = 0; i < 5; i++)
5181                 WREG32(GMCON_PGFSM_WRITE, 0);
5182
5183         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5184         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5185
5186         for (i = 0; i < 5; i++)
5187                 WREG32(GMCON_PGFSM_WRITE, 0);
5188
5189         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5190         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5191
5192         WREG32(GMCON_MISC3, save->gmcon_misc3);
5193         WREG32(GMCON_MISC, save->gmcon_misc);
5194         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5195 }
5196
5197 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5198 {
5199         struct evergreen_mc_save save;
5200         struct kv_reset_save_regs kv_save = { 0 };
5201         u32 tmp, i;
5202
5203         dev_info(rdev->dev, "GPU pci config reset\n");
5204
5205         /* disable dpm? */
5206
5207         /* disable cg/pg */
5208         cik_fini_pg(rdev);
5209         cik_fini_cg(rdev);
5210
5211         /* Disable GFX parsing/prefetching */
5212         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5213
5214         /* Disable MEC parsing/prefetching */
5215         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5216
5217         /* sdma0 */
5218         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5219         tmp |= SDMA_HALT;
5220         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5221         /* sdma1 */
5222         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5223         tmp |= SDMA_HALT;
5224         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5225         /* XXX other engines? */
5226
5227         /* halt the rlc, disable cp internal ints */
5228         cik_rlc_stop(rdev);
5229
5230         udelay(50);
5231
5232         /* disable mem access */
5233         evergreen_mc_stop(rdev, &save);
5234         if (evergreen_mc_wait_for_idle(rdev)) {
5235                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5236         }
5237
5238         if (rdev->flags & RADEON_IS_IGP)
5239                 kv_save_regs_for_reset(rdev, &kv_save);
5240
5241         /* disable BM */
5242         pci_clear_master(rdev->pdev);
5243         /* reset */
5244         radeon_pci_config_reset(rdev);
5245
5246         udelay(100);
5247
5248         /* wait for asic to come out of reset */
5249         for (i = 0; i < rdev->usec_timeout; i++) {
5250                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5251                         break;
5252                 udelay(1);
5253         }
5254
5255         /* does asic init need to be run first??? */
5256         if (rdev->flags & RADEON_IS_IGP)
5257                 kv_restore_regs_for_reset(rdev, &kv_save);
5258 }
5259
5260 /**
5261  * cik_asic_reset - soft reset GPU
5262  *
5263  * @rdev: radeon_device pointer
5264  * @hard: force hard reset
5265  *
5266  * Look up which blocks are hung and attempt
5267  * to reset them.
5268  * Returns 0 for success.
5269  */
5270 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5271 {
5272         u32 reset_mask;
5273
5274         if (hard) {
5275                 cik_gpu_pci_config_reset(rdev);
5276                 return 0;
5277         }
5278
5279         reset_mask = cik_gpu_check_soft_reset(rdev);
5280
5281         if (reset_mask)
5282                 r600_set_bios_scratch_engine_hung(rdev, true);
5283
5284         /* try soft reset */
5285         cik_gpu_soft_reset(rdev, reset_mask);
5286
5287         reset_mask = cik_gpu_check_soft_reset(rdev);
5288
5289         /* try pci config reset */
5290         if (reset_mask && radeon_hard_reset)
5291                 cik_gpu_pci_config_reset(rdev);
5292
5293         reset_mask = cik_gpu_check_soft_reset(rdev);
5294
5295         if (!reset_mask)
5296                 r600_set_bios_scratch_engine_hung(rdev, false);
5297
5298         return 0;
5299 }
5300
5301 /**
5302  * cik_gfx_is_lockup - check if the 3D engine is locked up
5303  *
5304  * @rdev: radeon_device pointer
5305  * @ring: radeon_ring structure holding ring information
5306  *
5307  * Check if the 3D engine is locked up (CIK).
5308  * Returns true if the engine is locked, false if not.
5309  */
5310 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5311 {
5312         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5313
5314         if (!(reset_mask & (RADEON_RESET_GFX |
5315                             RADEON_RESET_COMPUTE |
5316                             RADEON_RESET_CP))) {
5317                 radeon_ring_lockup_update(rdev, ring);
5318                 return false;
5319         }
5320         return radeon_ring_test_lockup(rdev, ring);
5321 }
5322
5323 /* MC */
5324 /**
5325  * cik_mc_program - program the GPU memory controller
5326  *
5327  * @rdev: radeon_device pointer
5328  *
5329  * Set the location of vram, gart, and AGP in the GPU's
5330  * physical address space (CIK).
5331  */
5332 static void cik_mc_program(struct radeon_device *rdev)
5333 {
5334         struct evergreen_mc_save save;
5335         u32 tmp;
5336         int i, j;
5337
5338         /* Initialize HDP */
5339         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5340                 WREG32((0x2c14 + j), 0x00000000);
5341                 WREG32((0x2c18 + j), 0x00000000);
5342                 WREG32((0x2c1c + j), 0x00000000);
5343                 WREG32((0x2c20 + j), 0x00000000);
5344                 WREG32((0x2c24 + j), 0x00000000);
5345         }
5346         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5347
5348         evergreen_mc_stop(rdev, &save);
5349         if (radeon_mc_wait_for_idle(rdev)) {
5350                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5351         }
5352         /* Lockout access through VGA aperture*/
5353         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5354         /* Update configuration */
5355         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5356                rdev->mc.vram_start >> 12);
5357         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5358                rdev->mc.vram_end >> 12);
5359         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5360                rdev->vram_scratch.gpu_addr >> 12);
5361         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5362         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5363         WREG32(MC_VM_FB_LOCATION, tmp);
5364         /* XXX double check these! */
5365         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5366         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5367         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5368         WREG32(MC_VM_AGP_BASE, 0);
5369         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5370         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5371         if (radeon_mc_wait_for_idle(rdev)) {
5372                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5373         }
5374         evergreen_mc_resume(rdev, &save);
5375         /* we need to own VRAM, so turn off the VGA renderer here
5376          * to stop it overwriting our objects */
5377         rv515_vga_render_disable(rdev);
5378 }
5379
5380 /**
5381  * cik_mc_init - initialize the memory controller driver params
5382  *
5383  * @rdev: radeon_device pointer
5384  *
5385  * Look up the amount of vram, vram width, and decide how to place
5386  * vram and gart within the GPU's physical address space (CIK).
5387  * Returns 0 for success.
5388  */
5389 static int cik_mc_init(struct radeon_device *rdev)
5390 {
5391         u32 tmp;
5392         int chansize, numchan;
5393
5394         /* Get VRAM informations */
5395         rdev->mc.vram_is_ddr = true;
5396         tmp = RREG32(MC_ARB_RAMCFG);
5397         if (tmp & CHANSIZE_MASK) {
5398                 chansize = 64;
5399         } else {
5400                 chansize = 32;
5401         }
5402         tmp = RREG32(MC_SHARED_CHMAP);
5403         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5404         case 0:
5405         default:
5406                 numchan = 1;
5407                 break;
5408         case 1:
5409                 numchan = 2;
5410                 break;
5411         case 2:
5412                 numchan = 4;
5413                 break;
5414         case 3:
5415                 numchan = 8;
5416                 break;
5417         case 4:
5418                 numchan = 3;
5419                 break;
5420         case 5:
5421                 numchan = 6;
5422                 break;
5423         case 6:
5424                 numchan = 10;
5425                 break;
5426         case 7:
5427                 numchan = 12;
5428                 break;
5429         case 8:
5430                 numchan = 16;
5431                 break;
5432         }
5433         rdev->mc.vram_width = numchan * chansize;
5434         /* Could aper size report 0 ? */
5435         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5436         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5437         /* size in MB on si */
5438         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5439         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5440         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5441         si_vram_gtt_location(rdev, &rdev->mc);
5442         radeon_update_bandwidth_info(rdev);
5443
5444         return 0;
5445 }
5446
5447 /*
5448  * GART
5449  * VMID 0 is the physical GPU addresses as used by the kernel.
5450  * VMIDs 1-15 are used for userspace clients and are handled
5451  * by the radeon vm/hsa code.
5452  */
5453 /**
5454  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5455  *
5456  * @rdev: radeon_device pointer
5457  *
5458  * Flush the TLB for the VMID 0 page table (CIK).
5459  */
5460 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5461 {
5462         /* flush hdp cache */
5463         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5464
5465         /* bits 0-15 are the VM contexts0-15 */
5466         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5467 }
5468
5469 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5470 {
5471         int i;
5472         uint32_t sh_mem_bases, sh_mem_config;
5473
5474         sh_mem_bases = 0x6000 | 0x6000 << 16;
5475         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5476         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5477
5478         mutex_lock(&rdev->srbm_mutex);
5479         for (i = 8; i < 16; i++) {
5480                 cik_srbm_select(rdev, 0, 0, 0, i);
5481                 /* CP and shaders */
5482                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5483                 WREG32(SH_MEM_APE1_BASE, 1);
5484                 WREG32(SH_MEM_APE1_LIMIT, 0);
5485                 WREG32(SH_MEM_BASES, sh_mem_bases);
5486         }
5487         cik_srbm_select(rdev, 0, 0, 0, 0);
5488         mutex_unlock(&rdev->srbm_mutex);
5489 }
5490
5491 /**
5492  * cik_pcie_gart_enable - gart enable
5493  *
5494  * @rdev: radeon_device pointer
5495  *
5496  * This sets up the TLBs, programs the page tables for VMID0,
5497  * sets up the hw for VMIDs 1-15 which are allocated on
5498  * demand, and sets up the global locations for the LDS, GDS,
5499  * and GPUVM for FSA64 clients (CIK).
5500  * Returns 0 for success, errors for failure.
5501  */
5502 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5503 {
5504         int r, i;
5505
5506         if (rdev->gart.robj == NULL) {
5507                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5508                 return -EINVAL;
5509         }
5510         r = radeon_gart_table_vram_pin(rdev);
5511         if (r)
5512                 return r;
5513         /* Setup TLB control */
5514         WREG32(MC_VM_MX_L1_TLB_CNTL,
5515                (0xA << 7) |
5516                ENABLE_L1_TLB |
5517                ENABLE_L1_FRAGMENT_PROCESSING |
5518                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5519                ENABLE_ADVANCED_DRIVER_MODEL |
5520                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5521         /* Setup L2 cache */
5522         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5523                ENABLE_L2_FRAGMENT_PROCESSING |
5524                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5525                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5526                EFFECTIVE_L2_QUEUE_SIZE(7) |
5527                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5528         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5529         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5530                BANK_SELECT(4) |
5531                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5532         /* setup context0 */
5533         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5534         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5535         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5536         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5537                         (u32)(rdev->dummy_page.addr >> 12));
5538         WREG32(VM_CONTEXT0_CNTL2, 0);
5539         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5540                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5541
5542         WREG32(0x15D4, 0);
5543         WREG32(0x15D8, 0);
5544         WREG32(0x15DC, 0);
5545
5546         /* restore context1-15 */
5547         /* set vm size, must be a multiple of 4 */
5548         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5549         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5550         for (i = 1; i < 16; i++) {
5551                 if (i < 8)
5552                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5553                                rdev->vm_manager.saved_table_addr[i]);
5554                 else
5555                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5556                                rdev->vm_manager.saved_table_addr[i]);
5557         }
5558
5559         /* enable context1-15 */
5560         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5561                (u32)(rdev->dummy_page.addr >> 12));
5562         WREG32(VM_CONTEXT1_CNTL2, 4);
5563         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5564                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5565                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5566                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5567                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5568                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5569                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5571                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5572                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5573                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5574                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5575                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5576                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5577
5578         if (rdev->family == CHIP_KAVERI) {
5579                 u32 tmp = RREG32(CHUB_CONTROL);
5580                 tmp &= ~BYPASS_VM;
5581                 WREG32(CHUB_CONTROL, tmp);
5582         }
5583
5584         /* XXX SH_MEM regs */
5585         /* where to put LDS, scratch, GPUVM in FSA64 space */
5586         mutex_lock(&rdev->srbm_mutex);
5587         for (i = 0; i < 16; i++) {
5588                 cik_srbm_select(rdev, 0, 0, 0, i);
5589                 /* CP and shaders */
5590                 WREG32(SH_MEM_CONFIG, 0);
5591                 WREG32(SH_MEM_APE1_BASE, 1);
5592                 WREG32(SH_MEM_APE1_LIMIT, 0);
5593                 WREG32(SH_MEM_BASES, 0);
5594                 /* SDMA GFX */
5595                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5596                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5597                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5598                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5599                 /* XXX SDMA RLC - todo */
5600         }
5601         cik_srbm_select(rdev, 0, 0, 0, 0);
5602         mutex_unlock(&rdev->srbm_mutex);
5603
5604         cik_pcie_init_compute_vmid(rdev);
5605
5606         cik_pcie_gart_tlb_flush(rdev);
5607         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5608                  (unsigned)(rdev->mc.gtt_size >> 20),
5609                  (unsigned long long)rdev->gart.table_addr);
5610         rdev->gart.ready = true;
5611         return 0;
5612 }
5613
5614 /**
5615  * cik_pcie_gart_disable - gart disable
5616  *
5617  * @rdev: radeon_device pointer
5618  *
5619  * This disables all VM page table (CIK).
5620  */
5621 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5622 {
5623         unsigned i;
5624
5625         for (i = 1; i < 16; ++i) {
5626                 uint32_t reg;
5627                 if (i < 8)
5628                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5629                 else
5630                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5631                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5632         }
5633
5634         /* Disable all tables */
5635         WREG32(VM_CONTEXT0_CNTL, 0);
5636         WREG32(VM_CONTEXT1_CNTL, 0);
5637         /* Setup TLB control */
5638         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5639                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5640         /* Setup L2 cache */
5641         WREG32(VM_L2_CNTL,
5642                ENABLE_L2_FRAGMENT_PROCESSING |
5643                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5644                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5645                EFFECTIVE_L2_QUEUE_SIZE(7) |
5646                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5647         WREG32(VM_L2_CNTL2, 0);
5648         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5649                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5650         radeon_gart_table_vram_unpin(rdev);
5651 }
5652
5653 /**
5654  * cik_pcie_gart_fini - vm fini callback
5655  *
5656  * @rdev: radeon_device pointer
5657  *
5658  * Tears down the driver GART/VM setup (CIK).
5659  */
5660 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5661 {
5662         cik_pcie_gart_disable(rdev);
5663         radeon_gart_table_vram_free(rdev);
5664         radeon_gart_fini(rdev);
5665 }
5666
5667 /* vm parser */
5668 /**
5669  * cik_ib_parse - vm ib_parse callback
5670  *
5671  * @rdev: radeon_device pointer
5672  * @ib: indirect buffer pointer
5673  *
5674  * CIK uses hw IB checking so this is a nop (CIK).
5675  */
5676 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5677 {
5678         return 0;
5679 }
5680
5681 /*
5682  * vm
5683  * VMID 0 is the physical GPU addresses as used by the kernel.
5684  * VMIDs 1-15 are used for userspace clients and are handled
5685  * by the radeon vm/hsa code.
5686  */
5687 /**
5688  * cik_vm_init - cik vm init callback
5689  *
5690  * @rdev: radeon_device pointer
5691  *
5692  * Inits cik specific vm parameters (number of VMs, base of vram for
5693  * VMIDs 1-15) (CIK).
5694  * Returns 0 for success.
5695  */
5696 int cik_vm_init(struct radeon_device *rdev)
5697 {
5698         /*
5699          * number of VMs
5700          * VMID 0 is reserved for System
5701          * radeon graphics/compute will use VMIDs 1-7
5702          * amdkfd will use VMIDs 8-15
5703          */
5704         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5705         /* base offset of vram pages */
5706         if (rdev->flags & RADEON_IS_IGP) {
5707                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5708                 tmp <<= 22;
5709                 rdev->vm_manager.vram_base_offset = tmp;
5710         } else
5711                 rdev->vm_manager.vram_base_offset = 0;
5712
5713         return 0;
5714 }
5715
5716 /**
5717  * cik_vm_fini - cik vm fini callback
5718  *
5719  * @rdev: radeon_device pointer
5720  *
5721  * Tear down any asic specific VM setup (CIK).
5722  */
5723 void cik_vm_fini(struct radeon_device *rdev)
5724 {
5725 }
5726
5727 /**
5728  * cik_vm_decode_fault - print human readable fault info
5729  *
5730  * @rdev: radeon_device pointer
5731  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5732  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5733  *
5734  * Print human readable fault information (CIK).
5735  */
5736 static void cik_vm_decode_fault(struct radeon_device *rdev,
5737                                 u32 status, u32 addr, u32 mc_client)
5738 {
5739         u32 mc_id;
5740         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5741         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5742         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5743                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5744
5745         if (rdev->family == CHIP_HAWAII)
5746                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5747         else
5748                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5749
5750         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5751                protections, vmid, addr,
5752                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5753                block, mc_client, mc_id);
5754 }
5755
5756 /**
5757  * cik_vm_flush - cik vm flush using the CP
5758  *
5759  * @rdev: radeon_device pointer
5760  *
5761  * Update the page table base and flush the VM TLB
5762  * using the CP (CIK).
5763  */
5764 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5765                   unsigned vm_id, uint64_t pd_addr)
5766 {
5767         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5768
5769         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5770         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5771                                  WRITE_DATA_DST_SEL(0)));
5772         if (vm_id < 8) {
5773                 radeon_ring_write(ring,
5774                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5775         } else {
5776                 radeon_ring_write(ring,
5777                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5778         }
5779         radeon_ring_write(ring, 0);
5780         radeon_ring_write(ring, pd_addr >> 12);
5781
5782         /* update SH_MEM_* regs */
5783         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5784         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5785                                  WRITE_DATA_DST_SEL(0)));
5786         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5787         radeon_ring_write(ring, 0);
5788         radeon_ring_write(ring, VMID(vm_id));
5789
5790         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5791         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5792                                  WRITE_DATA_DST_SEL(0)));
5793         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5794         radeon_ring_write(ring, 0);
5795
5796         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5797         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5798         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5799         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5800
5801         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5802         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5803                                  WRITE_DATA_DST_SEL(0)));
5804         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5805         radeon_ring_write(ring, 0);
5806         radeon_ring_write(ring, VMID(0));
5807
5808         /* HDP flush */
5809         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5810
5811         /* bits 0-15 are the VM contexts0-15 */
5812         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5813         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5814                                  WRITE_DATA_DST_SEL(0)));
5815         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5816         radeon_ring_write(ring, 0);
5817         radeon_ring_write(ring, 1 << vm_id);
5818
5819         /* wait for the invalidate to complete */
5820         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5821         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5822                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5823                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5824         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5825         radeon_ring_write(ring, 0);
5826         radeon_ring_write(ring, 0); /* ref */
5827         radeon_ring_write(ring, 0); /* mask */
5828         radeon_ring_write(ring, 0x20); /* poll interval */
5829
5830         /* compute doesn't have PFP */
5831         if (usepfp) {
5832                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5833                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5834                 radeon_ring_write(ring, 0x0);
5835         }
5836 }
5837
5838 /*
5839  * RLC
5840  * The RLC is a multi-purpose microengine that handles a
5841  * variety of functions, the most important of which is
5842  * the interrupt controller.
5843  */
5844 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5845                                           bool enable)
5846 {
5847         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5848
5849         if (enable)
5850                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5851         else
5852                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5853         WREG32(CP_INT_CNTL_RING0, tmp);
5854 }
5855
5856 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5857 {
5858         u32 tmp;
5859
5860         tmp = RREG32(RLC_LB_CNTL);
5861         if (enable)
5862                 tmp |= LOAD_BALANCE_ENABLE;
5863         else
5864                 tmp &= ~LOAD_BALANCE_ENABLE;
5865         WREG32(RLC_LB_CNTL, tmp);
5866 }
5867
5868 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5869 {
5870         u32 i, j, k;
5871         u32 mask;
5872
5873         mutex_lock(&rdev->grbm_idx_mutex);
5874         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5875                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5876                         cik_select_se_sh(rdev, i, j);
5877                         for (k = 0; k < rdev->usec_timeout; k++) {
5878                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5879                                         break;
5880                                 udelay(1);
5881                         }
5882                 }
5883         }
5884         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5885         mutex_unlock(&rdev->grbm_idx_mutex);
5886
5887         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5888         for (k = 0; k < rdev->usec_timeout; k++) {
5889                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5890                         break;
5891                 udelay(1);
5892         }
5893 }
5894
5895 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5896 {
5897         u32 tmp;
5898
5899         tmp = RREG32(RLC_CNTL);
5900         if (tmp != rlc)
5901                 WREG32(RLC_CNTL, rlc);
5902 }
5903
5904 static u32 cik_halt_rlc(struct radeon_device *rdev)
5905 {
5906         u32 data, orig;
5907
5908         orig = data = RREG32(RLC_CNTL);
5909
5910         if (data & RLC_ENABLE) {
5911                 u32 i;
5912
5913                 data &= ~RLC_ENABLE;
5914                 WREG32(RLC_CNTL, data);
5915
5916                 for (i = 0; i < rdev->usec_timeout; i++) {
5917                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5918                                 break;
5919                         udelay(1);
5920                 }
5921
5922                 cik_wait_for_rlc_serdes(rdev);
5923         }
5924
5925         return orig;
5926 }
5927
5928 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5929 {
5930         u32 tmp, i, mask;
5931
5932         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5933         WREG32(RLC_GPR_REG2, tmp);
5934
5935         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5936         for (i = 0; i < rdev->usec_timeout; i++) {
5937                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5938                         break;
5939                 udelay(1);
5940         }
5941
5942         for (i = 0; i < rdev->usec_timeout; i++) {
5943                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5944                         break;
5945                 udelay(1);
5946         }
5947 }
5948
5949 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5950 {
5951         u32 tmp;
5952
5953         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5954         WREG32(RLC_GPR_REG2, tmp);
5955 }
5956
5957 /**
5958  * cik_rlc_stop - stop the RLC ME
5959  *
5960  * @rdev: radeon_device pointer
5961  *
5962  * Halt the RLC ME (MicroEngine) (CIK).
5963  */
5964 static void cik_rlc_stop(struct radeon_device *rdev)
5965 {
5966         WREG32(RLC_CNTL, 0);
5967
5968         cik_enable_gui_idle_interrupt(rdev, false);
5969
5970         cik_wait_for_rlc_serdes(rdev);
5971 }
5972
5973 /**
5974  * cik_rlc_start - start the RLC ME
5975  *
5976  * @rdev: radeon_device pointer
5977  *
5978  * Unhalt the RLC ME (MicroEngine) (CIK).
5979  */
5980 static void cik_rlc_start(struct radeon_device *rdev)
5981 {
5982         WREG32(RLC_CNTL, RLC_ENABLE);
5983
5984         cik_enable_gui_idle_interrupt(rdev, true);
5985
5986         udelay(50);
5987 }
5988
5989 /**
5990  * cik_rlc_resume - setup the RLC hw
5991  *
5992  * @rdev: radeon_device pointer
5993  *
5994  * Initialize the RLC registers, load the ucode,
5995  * and start the RLC (CIK).
5996  * Returns 0 for success, -EINVAL if the ucode is not available.
5997  */
5998 static int cik_rlc_resume(struct radeon_device *rdev)
5999 {
6000         u32 i, size, tmp;
6001
6002         if (!rdev->rlc_fw)
6003                 return -EINVAL;
6004
6005         cik_rlc_stop(rdev);
6006
6007         /* disable CG */
6008         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6009         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6010
6011         si_rlc_reset(rdev);
6012
6013         cik_init_pg(rdev);
6014
6015         cik_init_cg(rdev);
6016
6017         WREG32(RLC_LB_CNTR_INIT, 0);
6018         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6019
6020         mutex_lock(&rdev->grbm_idx_mutex);
6021         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6022         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6023         WREG32(RLC_LB_PARAMS, 0x00600408);
6024         WREG32(RLC_LB_CNTL, 0x80000004);
6025         mutex_unlock(&rdev->grbm_idx_mutex);
6026
6027         WREG32(RLC_MC_CNTL, 0);
6028         WREG32(RLC_UCODE_CNTL, 0);
6029
6030         if (rdev->new_fw) {
6031                 const struct rlc_firmware_header_v1_0 *hdr =
6032                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6033                 const __le32 *fw_data = (const __le32 *)
6034                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6035
6036                 radeon_ucode_print_rlc_hdr(&hdr->header);
6037
6038                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6039                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6040                 for (i = 0; i < size; i++)
6041                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6042                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6043         } else {
6044                 const __be32 *fw_data;
6045
6046                 switch (rdev->family) {
6047                 case CHIP_BONAIRE:
6048                 case CHIP_HAWAII:
6049                 default:
6050                         size = BONAIRE_RLC_UCODE_SIZE;
6051                         break;
6052                 case CHIP_KAVERI:
6053                         size = KV_RLC_UCODE_SIZE;
6054                         break;
6055                 case CHIP_KABINI:
6056                         size = KB_RLC_UCODE_SIZE;
6057                         break;
6058                 case CHIP_MULLINS:
6059                         size = ML_RLC_UCODE_SIZE;
6060                         break;
6061                 }
6062
6063                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6064                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6065                 for (i = 0; i < size; i++)
6066                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6067                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6068         }
6069
6070         /* XXX - find out what chips support lbpw */
6071         cik_enable_lbpw(rdev, false);
6072
6073         if (rdev->family == CHIP_BONAIRE)
6074                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6075
6076         cik_rlc_start(rdev);
6077
6078         return 0;
6079 }
6080
6081 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6082 {
6083         u32 data, orig, tmp, tmp2;
6084
6085         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6086
6087         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6088                 cik_enable_gui_idle_interrupt(rdev, true);
6089
6090                 tmp = cik_halt_rlc(rdev);
6091
6092                 mutex_lock(&rdev->grbm_idx_mutex);
6093                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6094                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6095                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6096                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6097                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6098                 mutex_unlock(&rdev->grbm_idx_mutex);
6099
6100                 cik_update_rlc(rdev, tmp);
6101
6102                 data |= CGCG_EN | CGLS_EN;
6103         } else {
6104                 cik_enable_gui_idle_interrupt(rdev, false);
6105
6106                 RREG32(CB_CGTT_SCLK_CTRL);
6107                 RREG32(CB_CGTT_SCLK_CTRL);
6108                 RREG32(CB_CGTT_SCLK_CTRL);
6109                 RREG32(CB_CGTT_SCLK_CTRL);
6110
6111                 data &= ~(CGCG_EN | CGLS_EN);
6112         }
6113
6114         if (orig != data)
6115                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6116
6117 }
6118
6119 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6120 {
6121         u32 data, orig, tmp = 0;
6122
6123         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6124                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6125                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6126                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6127                                 data |= CP_MEM_LS_EN;
6128                                 if (orig != data)
6129                                         WREG32(CP_MEM_SLP_CNTL, data);
6130                         }
6131                 }
6132
6133                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6134                 data |= 0x00000001;
6135                 data &= 0xfffffffd;
6136                 if (orig != data)
6137                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6138
6139                 tmp = cik_halt_rlc(rdev);
6140
6141                 mutex_lock(&rdev->grbm_idx_mutex);
6142                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6143                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6144                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6145                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6146                 WREG32(RLC_SERDES_WR_CTRL, data);
6147                 mutex_unlock(&rdev->grbm_idx_mutex);
6148
6149                 cik_update_rlc(rdev, tmp);
6150
6151                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6152                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6153                         data &= ~SM_MODE_MASK;
6154                         data |= SM_MODE(0x2);
6155                         data |= SM_MODE_ENABLE;
6156                         data &= ~CGTS_OVERRIDE;
6157                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6158                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6159                                 data &= ~CGTS_LS_OVERRIDE;
6160                         data &= ~ON_MONITOR_ADD_MASK;
6161                         data |= ON_MONITOR_ADD_EN;
6162                         data |= ON_MONITOR_ADD(0x96);
6163                         if (orig != data)
6164                                 WREG32(CGTS_SM_CTRL_REG, data);
6165                 }
6166         } else {
6167                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6168                 data |= 0x00000003;
6169                 if (orig != data)
6170                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6171
6172                 data = RREG32(RLC_MEM_SLP_CNTL);
6173                 if (data & RLC_MEM_LS_EN) {
6174                         data &= ~RLC_MEM_LS_EN;
6175                         WREG32(RLC_MEM_SLP_CNTL, data);
6176                 }
6177
6178                 data = RREG32(CP_MEM_SLP_CNTL);
6179                 if (data & CP_MEM_LS_EN) {
6180                         data &= ~CP_MEM_LS_EN;
6181                         WREG32(CP_MEM_SLP_CNTL, data);
6182                 }
6183
6184                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6185                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6186                 if (orig != data)
6187                         WREG32(CGTS_SM_CTRL_REG, data);
6188
6189                 tmp = cik_halt_rlc(rdev);
6190
6191                 mutex_lock(&rdev->grbm_idx_mutex);
6192                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6193                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6194                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6195                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6196                 WREG32(RLC_SERDES_WR_CTRL, data);
6197                 mutex_unlock(&rdev->grbm_idx_mutex);
6198
6199                 cik_update_rlc(rdev, tmp);
6200         }
6201 }
6202
6203 static const u32 mc_cg_registers[] =
6204 {
6205         MC_HUB_MISC_HUB_CG,
6206         MC_HUB_MISC_SIP_CG,
6207         MC_HUB_MISC_VM_CG,
6208         MC_XPB_CLK_GAT,
6209         ATC_MISC_CG,
6210         MC_CITF_MISC_WR_CG,
6211         MC_CITF_MISC_RD_CG,
6212         MC_CITF_MISC_VM_CG,
6213         VM_L2_CG,
6214 };
6215
6216 static void cik_enable_mc_ls(struct radeon_device *rdev,
6217                              bool enable)
6218 {
6219         int i;
6220         u32 orig, data;
6221
6222         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6223                 orig = data = RREG32(mc_cg_registers[i]);
6224                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6225                         data |= MC_LS_ENABLE;
6226                 else
6227                         data &= ~MC_LS_ENABLE;
6228                 if (data != orig)
6229                         WREG32(mc_cg_registers[i], data);
6230         }
6231 }
6232
6233 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6234                                bool enable)
6235 {
6236         int i;
6237         u32 orig, data;
6238
6239         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6240                 orig = data = RREG32(mc_cg_registers[i]);
6241                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6242                         data |= MC_CG_ENABLE;
6243                 else
6244                         data &= ~MC_CG_ENABLE;
6245                 if (data != orig)
6246                         WREG32(mc_cg_registers[i], data);
6247         }
6248 }
6249
6250 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6251                                  bool enable)
6252 {
6253         u32 orig, data;
6254
6255         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6256                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6257                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6258         } else {
6259                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6260                 data |= 0xff000000;
6261                 if (data != orig)
6262                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6263
6264                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6265                 data |= 0xff000000;
6266                 if (data != orig)
6267                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6268         }
6269 }
6270
6271 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6272                                  bool enable)
6273 {
6274         u32 orig, data;
6275
6276         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6277                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6278                 data |= 0x100;
6279                 if (orig != data)
6280                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6281
6282                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6283                 data |= 0x100;
6284                 if (orig != data)
6285                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6286         } else {
6287                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6288                 data &= ~0x100;
6289                 if (orig != data)
6290                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6291
6292                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6293                 data &= ~0x100;
6294                 if (orig != data)
6295                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6296         }
6297 }
6298
6299 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6300                                 bool enable)
6301 {
6302         u32 orig, data;
6303
6304         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6305                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6306                 data = 0xfff;
6307                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6308
6309                 orig = data = RREG32(UVD_CGC_CTRL);
6310                 data |= DCM;
6311                 if (orig != data)
6312                         WREG32(UVD_CGC_CTRL, data);
6313         } else {
6314                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6315                 data &= ~0xfff;
6316                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6317
6318                 orig = data = RREG32(UVD_CGC_CTRL);
6319                 data &= ~DCM;
6320                 if (orig != data)
6321                         WREG32(UVD_CGC_CTRL, data);
6322         }
6323 }
6324
6325 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6326                                bool enable)
6327 {
6328         u32 orig, data;
6329
6330         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6331
6332         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6333                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6334                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6335         else
6336                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6337                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6338
6339         if (orig != data)
6340                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6341 }
6342
6343 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6344                                 bool enable)
6345 {
6346         u32 orig, data;
6347
6348         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6349
6350         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6351                 data &= ~CLOCK_GATING_DIS;
6352         else
6353                 data |= CLOCK_GATING_DIS;
6354
6355         if (orig != data)
6356                 WREG32(HDP_HOST_PATH_CNTL, data);
6357 }
6358
6359 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6360                               bool enable)
6361 {
6362         u32 orig, data;
6363
6364         orig = data = RREG32(HDP_MEM_POWER_LS);
6365
6366         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6367                 data |= HDP_LS_ENABLE;
6368         else
6369                 data &= ~HDP_LS_ENABLE;
6370
6371         if (orig != data)
6372                 WREG32(HDP_MEM_POWER_LS, data);
6373 }
6374
6375 void cik_update_cg(struct radeon_device *rdev,
6376                    u32 block, bool enable)
6377 {
6378
6379         if (block & RADEON_CG_BLOCK_GFX) {
6380                 cik_enable_gui_idle_interrupt(rdev, false);
6381                 /* order matters! */
6382                 if (enable) {
6383                         cik_enable_mgcg(rdev, true);
6384                         cik_enable_cgcg(rdev, true);
6385                 } else {
6386                         cik_enable_cgcg(rdev, false);
6387                         cik_enable_mgcg(rdev, false);
6388                 }
6389                 cik_enable_gui_idle_interrupt(rdev, true);
6390         }
6391
6392         if (block & RADEON_CG_BLOCK_MC) {
6393                 if (!(rdev->flags & RADEON_IS_IGP)) {
6394                         cik_enable_mc_mgcg(rdev, enable);
6395                         cik_enable_mc_ls(rdev, enable);
6396                 }
6397         }
6398
6399         if (block & RADEON_CG_BLOCK_SDMA) {
6400                 cik_enable_sdma_mgcg(rdev, enable);
6401                 cik_enable_sdma_mgls(rdev, enable);
6402         }
6403
6404         if (block & RADEON_CG_BLOCK_BIF) {
6405                 cik_enable_bif_mgls(rdev, enable);
6406         }
6407
6408         if (block & RADEON_CG_BLOCK_UVD) {
6409                 if (rdev->has_uvd)
6410                         cik_enable_uvd_mgcg(rdev, enable);
6411         }
6412
6413         if (block & RADEON_CG_BLOCK_HDP) {
6414                 cik_enable_hdp_mgcg(rdev, enable);
6415                 cik_enable_hdp_ls(rdev, enable);
6416         }
6417
6418         if (block & RADEON_CG_BLOCK_VCE) {
6419                 vce_v2_0_enable_mgcg(rdev, enable);
6420         }
6421 }
6422
6423 static void cik_init_cg(struct radeon_device *rdev)
6424 {
6425
6426         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6427
6428         if (rdev->has_uvd)
6429                 si_init_uvd_internal_cg(rdev);
6430
6431         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6432                              RADEON_CG_BLOCK_SDMA |
6433                              RADEON_CG_BLOCK_BIF |
6434                              RADEON_CG_BLOCK_UVD |
6435                              RADEON_CG_BLOCK_HDP), true);
6436 }
6437
6438 static void cik_fini_cg(struct radeon_device *rdev)
6439 {
6440         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6441                              RADEON_CG_BLOCK_SDMA |
6442                              RADEON_CG_BLOCK_BIF |
6443                              RADEON_CG_BLOCK_UVD |
6444                              RADEON_CG_BLOCK_HDP), false);
6445
6446         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6447 }
6448
6449 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6450                                           bool enable)
6451 {
6452         u32 data, orig;
6453
6454         orig = data = RREG32(RLC_PG_CNTL);
6455         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6456                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6457         else
6458                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6459         if (orig != data)
6460                 WREG32(RLC_PG_CNTL, data);
6461 }
6462
6463 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6464                                           bool enable)
6465 {
6466         u32 data, orig;
6467
6468         orig = data = RREG32(RLC_PG_CNTL);
6469         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6470                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6471         else
6472                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6473         if (orig != data)
6474                 WREG32(RLC_PG_CNTL, data);
6475 }
6476
6477 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6478 {
6479         u32 data, orig;
6480
6481         orig = data = RREG32(RLC_PG_CNTL);
6482         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6483                 data &= ~DISABLE_CP_PG;
6484         else
6485                 data |= DISABLE_CP_PG;
6486         if (orig != data)
6487                 WREG32(RLC_PG_CNTL, data);
6488 }
6489
6490 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6491 {
6492         u32 data, orig;
6493
6494         orig = data = RREG32(RLC_PG_CNTL);
6495         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6496                 data &= ~DISABLE_GDS_PG;
6497         else
6498                 data |= DISABLE_GDS_PG;
6499         if (orig != data)
6500                 WREG32(RLC_PG_CNTL, data);
6501 }
6502
6503 #define CP_ME_TABLE_SIZE    96
6504 #define CP_ME_TABLE_OFFSET  2048
6505 #define CP_MEC_TABLE_OFFSET 4096
6506
6507 void cik_init_cp_pg_table(struct radeon_device *rdev)
6508 {
6509         volatile u32 *dst_ptr;
6510         int me, i, max_me = 4;
6511         u32 bo_offset = 0;
6512         u32 table_offset, table_size;
6513
6514         if (rdev->family == CHIP_KAVERI)
6515                 max_me = 5;
6516
6517         if (rdev->rlc.cp_table_ptr == NULL)
6518                 return;
6519
6520         /* write the cp table buffer */
6521         dst_ptr = rdev->rlc.cp_table_ptr;
6522         for (me = 0; me < max_me; me++) {
6523                 if (rdev->new_fw) {
6524                         const __le32 *fw_data;
6525                         const struct gfx_firmware_header_v1_0 *hdr;
6526
6527                         if (me == 0) {
6528                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6529                                 fw_data = (const __le32 *)
6530                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531                                 table_offset = le32_to_cpu(hdr->jt_offset);
6532                                 table_size = le32_to_cpu(hdr->jt_size);
6533                         } else if (me == 1) {
6534                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6535                                 fw_data = (const __le32 *)
6536                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537                                 table_offset = le32_to_cpu(hdr->jt_offset);
6538                                 table_size = le32_to_cpu(hdr->jt_size);
6539                         } else if (me == 2) {
6540                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6541                                 fw_data = (const __le32 *)
6542                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543                                 table_offset = le32_to_cpu(hdr->jt_offset);
6544                                 table_size = le32_to_cpu(hdr->jt_size);
6545                         } else if (me == 3) {
6546                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6547                                 fw_data = (const __le32 *)
6548                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549                                 table_offset = le32_to_cpu(hdr->jt_offset);
6550                                 table_size = le32_to_cpu(hdr->jt_size);
6551                         } else {
6552                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6553                                 fw_data = (const __le32 *)
6554                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555                                 table_offset = le32_to_cpu(hdr->jt_offset);
6556                                 table_size = le32_to_cpu(hdr->jt_size);
6557                         }
6558
6559                         for (i = 0; i < table_size; i ++) {
6560                                 dst_ptr[bo_offset + i] =
6561                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6562                         }
6563                         bo_offset += table_size;
6564                 } else {
6565                         const __be32 *fw_data;
6566                         table_size = CP_ME_TABLE_SIZE;
6567
6568                         if (me == 0) {
6569                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6570                                 table_offset = CP_ME_TABLE_OFFSET;
6571                         } else if (me == 1) {
6572                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6573                                 table_offset = CP_ME_TABLE_OFFSET;
6574                         } else if (me == 2) {
6575                                 fw_data = (const __be32 *)rdev->me_fw->data;
6576                                 table_offset = CP_ME_TABLE_OFFSET;
6577                         } else {
6578                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6579                                 table_offset = CP_MEC_TABLE_OFFSET;
6580                         }
6581
6582                         for (i = 0; i < table_size; i ++) {
6583                                 dst_ptr[bo_offset + i] =
6584                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6585                         }
6586                         bo_offset += table_size;
6587                 }
6588         }
6589 }
6590
6591 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6592                                 bool enable)
6593 {
6594         u32 data, orig;
6595
6596         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6597                 orig = data = RREG32(RLC_PG_CNTL);
6598                 data |= GFX_PG_ENABLE;
6599                 if (orig != data)
6600                         WREG32(RLC_PG_CNTL, data);
6601
6602                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6603                 data |= AUTO_PG_EN;
6604                 if (orig != data)
6605                         WREG32(RLC_AUTO_PG_CTRL, data);
6606         } else {
6607                 orig = data = RREG32(RLC_PG_CNTL);
6608                 data &= ~GFX_PG_ENABLE;
6609                 if (orig != data)
6610                         WREG32(RLC_PG_CNTL, data);
6611
6612                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6613                 data &= ~AUTO_PG_EN;
6614                 if (orig != data)
6615                         WREG32(RLC_AUTO_PG_CTRL, data);
6616
6617                 data = RREG32(DB_RENDER_CONTROL);
6618         }
6619 }
6620
6621 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6622 {
6623         u32 mask = 0, tmp, tmp1;
6624         int i;
6625
6626         mutex_lock(&rdev->grbm_idx_mutex);
6627         cik_select_se_sh(rdev, se, sh);
6628         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6629         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6630         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6631         mutex_unlock(&rdev->grbm_idx_mutex);
6632
6633         tmp &= 0xffff0000;
6634
6635         tmp |= tmp1;
6636         tmp >>= 16;
6637
6638         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6639                 mask <<= 1;
6640                 mask |= 1;
6641         }
6642
6643         return (~tmp) & mask;
6644 }
6645
6646 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6647 {
6648         u32 i, j, k, active_cu_number = 0;
6649         u32 mask, counter, cu_bitmap;
6650         u32 tmp = 0;
6651
6652         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6653                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6654                         mask = 1;
6655                         cu_bitmap = 0;
6656                         counter = 0;
6657                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6658                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6659                                         if (counter < 2)
6660                                                 cu_bitmap |= mask;
6661                                         counter ++;
6662                                 }
6663                                 mask <<= 1;
6664                         }
6665
6666                         active_cu_number += counter;
6667                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6668                 }
6669         }
6670
6671         WREG32(RLC_PG_AO_CU_MASK, tmp);
6672
6673         tmp = RREG32(RLC_MAX_PG_CU);
6674         tmp &= ~MAX_PU_CU_MASK;
6675         tmp |= MAX_PU_CU(active_cu_number);
6676         WREG32(RLC_MAX_PG_CU, tmp);
6677 }
6678
6679 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6680                                        bool enable)
6681 {
6682         u32 data, orig;
6683
6684         orig = data = RREG32(RLC_PG_CNTL);
6685         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6686                 data |= STATIC_PER_CU_PG_ENABLE;
6687         else
6688                 data &= ~STATIC_PER_CU_PG_ENABLE;
6689         if (orig != data)
6690                 WREG32(RLC_PG_CNTL, data);
6691 }
6692
6693 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6694                                         bool enable)
6695 {
6696         u32 data, orig;
6697
6698         orig = data = RREG32(RLC_PG_CNTL);
6699         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6700                 data |= DYN_PER_CU_PG_ENABLE;
6701         else
6702                 data &= ~DYN_PER_CU_PG_ENABLE;
6703         if (orig != data)
6704                 WREG32(RLC_PG_CNTL, data);
6705 }
6706
6707 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6708 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6709
6710 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6711 {
6712         u32 data, orig;
6713         u32 i;
6714
6715         if (rdev->rlc.cs_data) {
6716                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6717                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6718                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6719                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6720         } else {
6721                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6722                 for (i = 0; i < 3; i++)
6723                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6724         }
6725         if (rdev->rlc.reg_list) {
6726                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6727                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6728                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6729         }
6730
6731         orig = data = RREG32(RLC_PG_CNTL);
6732         data |= GFX_PG_SRC;
6733         if (orig != data)
6734                 WREG32(RLC_PG_CNTL, data);
6735
6736         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6737         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6738
6739         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6740         data &= ~IDLE_POLL_COUNT_MASK;
6741         data |= IDLE_POLL_COUNT(0x60);
6742         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6743
6744         data = 0x10101010;
6745         WREG32(RLC_PG_DELAY, data);
6746
6747         data = RREG32(RLC_PG_DELAY_2);
6748         data &= ~0xff;
6749         data |= 0x3;
6750         WREG32(RLC_PG_DELAY_2, data);
6751
6752         data = RREG32(RLC_AUTO_PG_CTRL);
6753         data &= ~GRBM_REG_SGIT_MASK;
6754         data |= GRBM_REG_SGIT(0x700);
6755         WREG32(RLC_AUTO_PG_CTRL, data);
6756
6757 }
6758
6759 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6760 {
6761         cik_enable_gfx_cgpg(rdev, enable);
6762         cik_enable_gfx_static_mgpg(rdev, enable);
6763         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6764 }
6765
6766 u32 cik_get_csb_size(struct radeon_device *rdev)
6767 {
6768         u32 count = 0;
6769         const struct cs_section_def *sect = NULL;
6770         const struct cs_extent_def *ext = NULL;
6771
6772         if (rdev->rlc.cs_data == NULL)
6773                 return 0;
6774
6775         /* begin clear state */
6776         count += 2;
6777         /* context control state */
6778         count += 3;
6779
6780         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6781                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6782                         if (sect->id == SECT_CONTEXT)
6783                                 count += 2 + ext->reg_count;
6784                         else
6785                                 return 0;
6786                 }
6787         }
6788         /* pa_sc_raster_config/pa_sc_raster_config1 */
6789         count += 4;
6790         /* end clear state */
6791         count += 2;
6792         /* clear state */
6793         count += 2;
6794
6795         return count;
6796 }
6797
6798 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6799 {
6800         u32 count = 0, i;
6801         const struct cs_section_def *sect = NULL;
6802         const struct cs_extent_def *ext = NULL;
6803
6804         if (rdev->rlc.cs_data == NULL)
6805                 return;
6806         if (buffer == NULL)
6807                 return;
6808
6809         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6810         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6811
6812         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6813         buffer[count++] = cpu_to_le32(0x80000000);
6814         buffer[count++] = cpu_to_le32(0x80000000);
6815
6816         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6817                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6818                         if (sect->id == SECT_CONTEXT) {
6819                                 buffer[count++] =
6820                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6821                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6822                                 for (i = 0; i < ext->reg_count; i++)
6823                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6824                         } else {
6825                                 return;
6826                         }
6827                 }
6828         }
6829
6830         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6831         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6832         switch (rdev->family) {
6833         case CHIP_BONAIRE:
6834                 buffer[count++] = cpu_to_le32(0x16000012);
6835                 buffer[count++] = cpu_to_le32(0x00000000);
6836                 break;
6837         case CHIP_KAVERI:
6838                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6839                 buffer[count++] = cpu_to_le32(0x00000000);
6840                 break;
6841         case CHIP_KABINI:
6842         case CHIP_MULLINS:
6843                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6844                 buffer[count++] = cpu_to_le32(0x00000000);
6845                 break;
6846         case CHIP_HAWAII:
6847                 buffer[count++] = cpu_to_le32(0x3a00161a);
6848                 buffer[count++] = cpu_to_le32(0x0000002e);
6849                 break;
6850         default:
6851                 buffer[count++] = cpu_to_le32(0x00000000);
6852                 buffer[count++] = cpu_to_le32(0x00000000);
6853                 break;
6854         }
6855
6856         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6857         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6858
6859         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6860         buffer[count++] = cpu_to_le32(0);
6861 }
6862
6863 static void cik_init_pg(struct radeon_device *rdev)
6864 {
6865         if (rdev->pg_flags) {
6866                 cik_enable_sck_slowdown_on_pu(rdev, true);
6867                 cik_enable_sck_slowdown_on_pd(rdev, true);
6868                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6869                         cik_init_gfx_cgpg(rdev);
6870                         cik_enable_cp_pg(rdev, true);
6871                         cik_enable_gds_pg(rdev, true);
6872                 }
6873                 cik_init_ao_cu_mask(rdev);
6874                 cik_update_gfx_pg(rdev, true);
6875         }
6876 }
6877
6878 static void cik_fini_pg(struct radeon_device *rdev)
6879 {
6880         if (rdev->pg_flags) {
6881                 cik_update_gfx_pg(rdev, false);
6882                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6883                         cik_enable_cp_pg(rdev, false);
6884                         cik_enable_gds_pg(rdev, false);
6885                 }
6886         }
6887 }
6888
6889 /*
6890  * Interrupts
6891  * Starting with r6xx, interrupts are handled via a ring buffer.
6892  * Ring buffers are areas of GPU accessible memory that the GPU
6893  * writes interrupt vectors into and the host reads vectors out of.
6894  * There is a rptr (read pointer) that determines where the
6895  * host is currently reading, and a wptr (write pointer)
6896  * which determines where the GPU has written.  When the
6897  * pointers are equal, the ring is idle.  When the GPU
6898  * writes vectors to the ring buffer, it increments the
6899  * wptr.  When there is an interrupt, the host then starts
6900  * fetching commands and processing them until the pointers are
6901  * equal again at which point it updates the rptr.
6902  */
6903
6904 /**
6905  * cik_enable_interrupts - Enable the interrupt ring buffer
6906  *
6907  * @rdev: radeon_device pointer
6908  *
6909  * Enable the interrupt ring buffer (CIK).
6910  */
6911 static void cik_enable_interrupts(struct radeon_device *rdev)
6912 {
6913         u32 ih_cntl = RREG32(IH_CNTL);
6914         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6915
6916         ih_cntl |= ENABLE_INTR;
6917         ih_rb_cntl |= IH_RB_ENABLE;
6918         WREG32(IH_CNTL, ih_cntl);
6919         WREG32(IH_RB_CNTL, ih_rb_cntl);
6920         rdev->ih.enabled = true;
6921 }
6922
6923 /**
6924  * cik_disable_interrupts - Disable the interrupt ring buffer
6925  *
6926  * @rdev: radeon_device pointer
6927  *
6928  * Disable the interrupt ring buffer (CIK).
6929  */
6930 static void cik_disable_interrupts(struct radeon_device *rdev)
6931 {
6932         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6933         u32 ih_cntl = RREG32(IH_CNTL);
6934
6935         ih_rb_cntl &= ~IH_RB_ENABLE;
6936         ih_cntl &= ~ENABLE_INTR;
6937         WREG32(IH_RB_CNTL, ih_rb_cntl);
6938         WREG32(IH_CNTL, ih_cntl);
6939         /* set rptr, wptr to 0 */
6940         WREG32(IH_RB_RPTR, 0);
6941         WREG32(IH_RB_WPTR, 0);
6942         rdev->ih.enabled = false;
6943         rdev->ih.rptr = 0;
6944 }
6945
6946 /**
6947  * cik_disable_interrupt_state - Disable all interrupt sources
6948  *
6949  * @rdev: radeon_device pointer
6950  *
6951  * Clear all interrupt enable bits used by the driver (CIK).
6952  */
6953 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6954 {
6955         u32 tmp;
6956
6957         /* gfx ring */
6958         tmp = RREG32(CP_INT_CNTL_RING0) &
6959                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6960         WREG32(CP_INT_CNTL_RING0, tmp);
6961         /* sdma */
6962         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6963         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6964         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6965         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6966         /* compute queues */
6967         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6968         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6969         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6970         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6971         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6972         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6973         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6974         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6975         /* grbm */
6976         WREG32(GRBM_INT_CNTL, 0);
6977         /* SRBM */
6978         WREG32(SRBM_INT_CNTL, 0);
6979         /* vline/vblank, etc. */
6980         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6981         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6982         if (rdev->num_crtc >= 4) {
6983                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6984                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6985         }
6986         if (rdev->num_crtc >= 6) {
6987                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6988                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6989         }
6990         /* pflip */
6991         if (rdev->num_crtc >= 2) {
6992                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6993                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6994         }
6995         if (rdev->num_crtc >= 4) {
6996                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6997                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6998         }
6999         if (rdev->num_crtc >= 6) {
7000                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7001                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7002         }
7003
7004         /* dac hotplug */
7005         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7006
7007         /* digital hotplug */
7008         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7009         WREG32(DC_HPD1_INT_CONTROL, tmp);
7010         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7011         WREG32(DC_HPD2_INT_CONTROL, tmp);
7012         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013         WREG32(DC_HPD3_INT_CONTROL, tmp);
7014         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7015         WREG32(DC_HPD4_INT_CONTROL, tmp);
7016         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7017         WREG32(DC_HPD5_INT_CONTROL, tmp);
7018         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7019         WREG32(DC_HPD6_INT_CONTROL, tmp);
7020
7021 }
7022
7023 /**
7024  * cik_irq_init - init and enable the interrupt ring
7025  *
7026  * @rdev: radeon_device pointer
7027  *
7028  * Allocate a ring buffer for the interrupt controller,
7029  * enable the RLC, disable interrupts, enable the IH
7030  * ring buffer and enable it (CIK).
7031  * Called at device load and reume.
7032  * Returns 0 for success, errors for failure.
7033  */
7034 static int cik_irq_init(struct radeon_device *rdev)
7035 {
7036         int ret = 0;
7037         int rb_bufsz;
7038         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7039
7040         /* allocate ring */
7041         ret = r600_ih_ring_alloc(rdev);
7042         if (ret)
7043                 return ret;
7044
7045         /* disable irqs */
7046         cik_disable_interrupts(rdev);
7047
7048         /* init rlc */
7049         ret = cik_rlc_resume(rdev);
7050         if (ret) {
7051                 r600_ih_ring_fini(rdev);
7052                 return ret;
7053         }
7054
7055         /* setup interrupt control */
7056         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7057         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7058         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7059         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7060          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7061          */
7062         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7063         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7064         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7065         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7066
7067         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7068         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7069
7070         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7071                       IH_WPTR_OVERFLOW_CLEAR |
7072                       (rb_bufsz << 1));
7073
7074         if (rdev->wb.enabled)
7075                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7076
7077         /* set the writeback address whether it's enabled or not */
7078         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7079         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7080
7081         WREG32(IH_RB_CNTL, ih_rb_cntl);
7082
7083         /* set rptr, wptr to 0 */
7084         WREG32(IH_RB_RPTR, 0);
7085         WREG32(IH_RB_WPTR, 0);
7086
7087         /* Default settings for IH_CNTL (disabled at first) */
7088         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7089         /* RPTR_REARM only works if msi's are enabled */
7090         if (rdev->msi_enabled)
7091                 ih_cntl |= RPTR_REARM;
7092         WREG32(IH_CNTL, ih_cntl);
7093
7094         /* force the active interrupt state to all disabled */
7095         cik_disable_interrupt_state(rdev);
7096
7097         pci_set_master(rdev->pdev);
7098
7099         /* enable irqs */
7100         cik_enable_interrupts(rdev);
7101
7102         return ret;
7103 }
7104
7105 /**
7106  * cik_irq_set - enable/disable interrupt sources
7107  *
7108  * @rdev: radeon_device pointer
7109  *
7110  * Enable interrupt sources on the GPU (vblanks, hpd,
7111  * etc.) (CIK).
7112  * Returns 0 for success, errors for failure.
7113  */
7114 int cik_irq_set(struct radeon_device *rdev)
7115 {
7116         u32 cp_int_cntl;
7117         u32 cp_m1p0;
7118         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7119         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7120         u32 grbm_int_cntl = 0;
7121         u32 dma_cntl, dma_cntl1;
7122
7123         if (!rdev->irq.installed) {
7124                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7125                 return -EINVAL;
7126         }
7127         /* don't enable anything if the ih is disabled */
7128         if (!rdev->ih.enabled) {
7129                 cik_disable_interrupts(rdev);
7130                 /* force the active interrupt state to all disabled */
7131                 cik_disable_interrupt_state(rdev);
7132                 return 0;
7133         }
7134
7135         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7136                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7137         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7138
7139         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7140         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7141         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7142         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7143         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7144         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7145
7146         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7147         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7148
7149         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7150
7151         /* enable CP interrupts on all rings */
7152         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7153                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7154                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7155         }
7156         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7157                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7158                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7159                 if (ring->me == 1) {
7160                         switch (ring->pipe) {
7161                         case 0:
7162                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7163                                 break;
7164                         default:
7165                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7166                                 break;
7167                         }
7168                 } else {
7169                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7170                 }
7171         }
7172         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7173                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7174                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7175                 if (ring->me == 1) {
7176                         switch (ring->pipe) {
7177                         case 0:
7178                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7179                                 break;
7180                         default:
7181                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7182                                 break;
7183                         }
7184                 } else {
7185                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7186                 }
7187         }
7188
7189         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7190                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7191                 dma_cntl |= TRAP_ENABLE;
7192         }
7193
7194         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7195                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7196                 dma_cntl1 |= TRAP_ENABLE;
7197         }
7198
7199         if (rdev->irq.crtc_vblank_int[0] ||
7200             atomic_read(&rdev->irq.pflip[0])) {
7201                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7202                 crtc1 |= VBLANK_INTERRUPT_MASK;
7203         }
7204         if (rdev->irq.crtc_vblank_int[1] ||
7205             atomic_read(&rdev->irq.pflip[1])) {
7206                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7207                 crtc2 |= VBLANK_INTERRUPT_MASK;
7208         }
7209         if (rdev->irq.crtc_vblank_int[2] ||
7210             atomic_read(&rdev->irq.pflip[2])) {
7211                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7212                 crtc3 |= VBLANK_INTERRUPT_MASK;
7213         }
7214         if (rdev->irq.crtc_vblank_int[3] ||
7215             atomic_read(&rdev->irq.pflip[3])) {
7216                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7217                 crtc4 |= VBLANK_INTERRUPT_MASK;
7218         }
7219         if (rdev->irq.crtc_vblank_int[4] ||
7220             atomic_read(&rdev->irq.pflip[4])) {
7221                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7222                 crtc5 |= VBLANK_INTERRUPT_MASK;
7223         }
7224         if (rdev->irq.crtc_vblank_int[5] ||
7225             atomic_read(&rdev->irq.pflip[5])) {
7226                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7227                 crtc6 |= VBLANK_INTERRUPT_MASK;
7228         }
7229         if (rdev->irq.hpd[0]) {
7230                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7231                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7232         }
7233         if (rdev->irq.hpd[1]) {
7234                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7235                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7236         }
7237         if (rdev->irq.hpd[2]) {
7238                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7239                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7240         }
7241         if (rdev->irq.hpd[3]) {
7242                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7243                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7244         }
7245         if (rdev->irq.hpd[4]) {
7246                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7247                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7248         }
7249         if (rdev->irq.hpd[5]) {
7250                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7251                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7252         }
7253
7254         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7255
7256         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7257         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7258
7259         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7260
7261         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7262
7263         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7264         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7265         if (rdev->num_crtc >= 4) {
7266                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7267                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7268         }
7269         if (rdev->num_crtc >= 6) {
7270                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7271                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7272         }
7273
7274         if (rdev->num_crtc >= 2) {
7275                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7276                        GRPH_PFLIP_INT_MASK);
7277                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7278                        GRPH_PFLIP_INT_MASK);
7279         }
7280         if (rdev->num_crtc >= 4) {
7281                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7282                        GRPH_PFLIP_INT_MASK);
7283                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7284                        GRPH_PFLIP_INT_MASK);
7285         }
7286         if (rdev->num_crtc >= 6) {
7287                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7288                        GRPH_PFLIP_INT_MASK);
7289                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7290                        GRPH_PFLIP_INT_MASK);
7291         }
7292
7293         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7294         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7295         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7296         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7297         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7298         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7299
7300         /* posting read */
7301         RREG32(SRBM_STATUS);
7302
7303         return 0;
7304 }
7305
7306 /**
7307  * cik_irq_ack - ack interrupt sources
7308  *
7309  * @rdev: radeon_device pointer
7310  *
7311  * Ack interrupt sources on the GPU (vblanks, hpd,
7312  * etc.) (CIK).  Certain interrupts sources are sw
7313  * generated and do not require an explicit ack.
7314  */
7315 static inline void cik_irq_ack(struct radeon_device *rdev)
7316 {
7317         u32 tmp;
7318
7319         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7320         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7321         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7322         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7323         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7324         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7325         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7326
7327         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7328                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7329         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7330                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7331         if (rdev->num_crtc >= 4) {
7332                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7333                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7334                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7335                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7336         }
7337         if (rdev->num_crtc >= 6) {
7338                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7339                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7340                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7341                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7342         }
7343
7344         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7345                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7346                        GRPH_PFLIP_INT_CLEAR);
7347         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7348                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7349                        GRPH_PFLIP_INT_CLEAR);
7350         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7351                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7352         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7353                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7354         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7355                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7356         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7357                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7358
7359         if (rdev->num_crtc >= 4) {
7360                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7361                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7362                                GRPH_PFLIP_INT_CLEAR);
7363                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7364                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7365                                GRPH_PFLIP_INT_CLEAR);
7366                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7367                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7368                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7369                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7370                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7371                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7372                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7373                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7374         }
7375
7376         if (rdev->num_crtc >= 6) {
7377                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7378                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7379                                GRPH_PFLIP_INT_CLEAR);
7380                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7381                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7382                                GRPH_PFLIP_INT_CLEAR);
7383                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7384                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7385                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7386                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7387                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7388                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7389                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7390                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7391         }
7392
7393         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7394                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7395                 tmp |= DC_HPDx_INT_ACK;
7396                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7397         }
7398         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7399                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7400                 tmp |= DC_HPDx_INT_ACK;
7401                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7402         }
7403         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7404                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7405                 tmp |= DC_HPDx_INT_ACK;
7406                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7407         }
7408         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7409                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7410                 tmp |= DC_HPDx_INT_ACK;
7411                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7412         }
7413         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7414                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7415                 tmp |= DC_HPDx_INT_ACK;
7416                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7417         }
7418         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7419                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7420                 tmp |= DC_HPDx_INT_ACK;
7421                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7422         }
7423         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7424                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7425                 tmp |= DC_HPDx_RX_INT_ACK;
7426                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7427         }
7428         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7429                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7430                 tmp |= DC_HPDx_RX_INT_ACK;
7431                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7432         }
7433         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7434                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7435                 tmp |= DC_HPDx_RX_INT_ACK;
7436                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7437         }
7438         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7439                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7440                 tmp |= DC_HPDx_RX_INT_ACK;
7441                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7442         }
7443         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7444                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7445                 tmp |= DC_HPDx_RX_INT_ACK;
7446                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7447         }
7448         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7449                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7450                 tmp |= DC_HPDx_RX_INT_ACK;
7451                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7452         }
7453 }
7454
7455 /**
7456  * cik_irq_disable - disable interrupts
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Disable interrupts on the hw (CIK).
7461  */
7462 static void cik_irq_disable(struct radeon_device *rdev)
7463 {
7464         cik_disable_interrupts(rdev);
7465         /* Wait and acknowledge irq */
7466         mdelay(1);
7467         cik_irq_ack(rdev);
7468         cik_disable_interrupt_state(rdev);
7469 }
7470
7471 /**
7472  * cik_irq_disable - disable interrupts for suspend
7473  *
7474  * @rdev: radeon_device pointer
7475  *
7476  * Disable interrupts and stop the RLC (CIK).
7477  * Used for suspend.
7478  */
7479 static void cik_irq_suspend(struct radeon_device *rdev)
7480 {
7481         cik_irq_disable(rdev);
7482         cik_rlc_stop(rdev);
7483 }
7484
7485 /**
7486  * cik_irq_fini - tear down interrupt support
7487  *
7488  * @rdev: radeon_device pointer
7489  *
7490  * Disable interrupts on the hw and free the IH ring
7491  * buffer (CIK).
7492  * Used for driver unload.
7493  */
7494 static void cik_irq_fini(struct radeon_device *rdev)
7495 {
7496         cik_irq_suspend(rdev);
7497         r600_ih_ring_fini(rdev);
7498 }
7499
7500 /**
7501  * cik_get_ih_wptr - get the IH ring buffer wptr
7502  *
7503  * @rdev: radeon_device pointer
7504  *
7505  * Get the IH ring buffer wptr from either the register
7506  * or the writeback memory buffer (CIK).  Also check for
7507  * ring buffer overflow and deal with it.
7508  * Used by cik_irq_process().
7509  * Returns the value of the wptr.
7510  */
7511 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7512 {
7513         u32 wptr, tmp;
7514
7515         if (rdev->wb.enabled)
7516                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7517         else
7518                 wptr = RREG32(IH_RB_WPTR);
7519
7520         if (wptr & RB_OVERFLOW) {
7521                 wptr &= ~RB_OVERFLOW;
7522                 /* When a ring buffer overflow happen start parsing interrupt
7523                  * from the last not overwritten vector (wptr + 16). Hopefully
7524                  * this should allow us to catchup.
7525                  */
7526                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7527                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7528                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7529                 tmp = RREG32(IH_RB_CNTL);
7530                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7531                 WREG32(IH_RB_CNTL, tmp);
7532         }
7533         return (wptr & rdev->ih.ptr_mask);
7534 }
7535
7536 /*        CIK IV Ring
7537  * Each IV ring entry is 128 bits:
7538  * [7:0]    - interrupt source id
7539  * [31:8]   - reserved
7540  * [59:32]  - interrupt source data
7541  * [63:60]  - reserved
7542  * [71:64]  - RINGID
7543  *            CP:
7544  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7545  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7546  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7547  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7548  *            PIPE_ID - ME0 0=3D
7549  *                    - ME1&2 compute dispatcher (4 pipes each)
7550  *            SDMA:
7551  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7552  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7553  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7554  * [79:72]  - VMID
7555  * [95:80]  - PASID
7556  * [127:96] - reserved
7557  */
7558 /**
7559  * cik_irq_process - interrupt handler
7560  *
7561  * @rdev: radeon_device pointer
7562  *
7563  * Interrupt hander (CIK).  Walk the IH ring,
7564  * ack interrupts and schedule work to handle
7565  * interrupt events.
7566  * Returns irq process return code.
7567  */
7568 int cik_irq_process(struct radeon_device *rdev)
7569 {
7570         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7571         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7572         u32 wptr;
7573         u32 rptr;
7574         u32 src_id, src_data, ring_id;
7575         u8 me_id, pipe_id, queue_id;
7576         u32 ring_index;
7577         bool queue_hotplug = false;
7578         bool queue_dp = false;
7579         bool queue_reset = false;
7580         u32 addr, status, mc_client;
7581         bool queue_thermal = false;
7582
7583         if (!rdev->ih.enabled || rdev->shutdown)
7584                 return IRQ_NONE;
7585
7586         wptr = cik_get_ih_wptr(rdev);
7587
7588 restart_ih:
7589         /* is somebody else already processing irqs? */
7590         if (atomic_xchg(&rdev->ih.lock, 1))
7591                 return IRQ_NONE;
7592
7593         rptr = rdev->ih.rptr;
7594         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7595
7596         /* Order reading of wptr vs. reading of IH ring data */
7597         rmb();
7598
7599         /* display interrupts */
7600         cik_irq_ack(rdev);
7601
7602         while (rptr != wptr) {
7603                 /* wptr/rptr are in bytes! */
7604                 ring_index = rptr / 4;
7605
7606                 radeon_kfd_interrupt(rdev,
7607                                 (const void *) &rdev->ih.ring[ring_index]);
7608
7609                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7610                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7611                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7612
7613                 switch (src_id) {
7614                 case 1: /* D1 vblank/vline */
7615                         switch (src_data) {
7616                         case 0: /* D1 vblank */
7617                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7618                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7619
7620                                 if (rdev->irq.crtc_vblank_int[0]) {
7621                                         drm_handle_vblank(rdev->ddev, 0);
7622                                         rdev->pm.vblank_sync = true;
7623                                         wake_up(&rdev->irq.vblank_queue);
7624                                 }
7625                                 if (atomic_read(&rdev->irq.pflip[0]))
7626                                         radeon_crtc_handle_vblank(rdev, 0);
7627                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7628                                 DRM_DEBUG("IH: D1 vblank\n");
7629
7630                                 break;
7631                         case 1: /* D1 vline */
7632                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7633                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7634
7635                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7636                                 DRM_DEBUG("IH: D1 vline\n");
7637
7638                                 break;
7639                         default:
7640                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7641                                 break;
7642                         }
7643                         break;
7644                 case 2: /* D2 vblank/vline */
7645                         switch (src_data) {
7646                         case 0: /* D2 vblank */
7647                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7648                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7649
7650                                 if (rdev->irq.crtc_vblank_int[1]) {
7651                                         drm_handle_vblank(rdev->ddev, 1);
7652                                         rdev->pm.vblank_sync = true;
7653                                         wake_up(&rdev->irq.vblank_queue);
7654                                 }
7655                                 if (atomic_read(&rdev->irq.pflip[1]))
7656                                         radeon_crtc_handle_vblank(rdev, 1);
7657                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7658                                 DRM_DEBUG("IH: D2 vblank\n");
7659
7660                                 break;
7661                         case 1: /* D2 vline */
7662                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7663                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7664
7665                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7666                                 DRM_DEBUG("IH: D2 vline\n");
7667
7668                                 break;
7669                         default:
7670                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7671                                 break;
7672                         }
7673                         break;
7674                 case 3: /* D3 vblank/vline */
7675                         switch (src_data) {
7676                         case 0: /* D3 vblank */
7677                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7678                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7679
7680                                 if (rdev->irq.crtc_vblank_int[2]) {
7681                                         drm_handle_vblank(rdev->ddev, 2);
7682                                         rdev->pm.vblank_sync = true;
7683                                         wake_up(&rdev->irq.vblank_queue);
7684                                 }
7685                                 if (atomic_read(&rdev->irq.pflip[2]))
7686                                         radeon_crtc_handle_vblank(rdev, 2);
7687                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7688                                 DRM_DEBUG("IH: D3 vblank\n");
7689
7690                                 break;
7691                         case 1: /* D3 vline */
7692                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7693                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7694
7695                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7696                                 DRM_DEBUG("IH: D3 vline\n");
7697
7698                                 break;
7699                         default:
7700                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7701                                 break;
7702                         }
7703                         break;
7704                 case 4: /* D4 vblank/vline */
7705                         switch (src_data) {
7706                         case 0: /* D4 vblank */
7707                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7708                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7709
7710                                 if (rdev->irq.crtc_vblank_int[3]) {
7711                                         drm_handle_vblank(rdev->ddev, 3);
7712                                         rdev->pm.vblank_sync = true;
7713                                         wake_up(&rdev->irq.vblank_queue);
7714                                 }
7715                                 if (atomic_read(&rdev->irq.pflip[3]))
7716                                         radeon_crtc_handle_vblank(rdev, 3);
7717                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7718                                 DRM_DEBUG("IH: D4 vblank\n");
7719
7720                                 break;
7721                         case 1: /* D4 vline */
7722                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7723                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724
7725                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7726                                 DRM_DEBUG("IH: D4 vline\n");
7727
7728                                 break;
7729                         default:
7730                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7731                                 break;
7732                         }
7733                         break;
7734                 case 5: /* D5 vblank/vline */
7735                         switch (src_data) {
7736                         case 0: /* D5 vblank */
7737                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7738                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7739
7740                                 if (rdev->irq.crtc_vblank_int[4]) {
7741                                         drm_handle_vblank(rdev->ddev, 4);
7742                                         rdev->pm.vblank_sync = true;
7743                                         wake_up(&rdev->irq.vblank_queue);
7744                                 }
7745                                 if (atomic_read(&rdev->irq.pflip[4]))
7746                                         radeon_crtc_handle_vblank(rdev, 4);
7747                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7748                                 DRM_DEBUG("IH: D5 vblank\n");
7749
7750                                 break;
7751                         case 1: /* D5 vline */
7752                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7753                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7754
7755                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7756                                 DRM_DEBUG("IH: D5 vline\n");
7757
7758                                 break;
7759                         default:
7760                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7761                                 break;
7762                         }
7763                         break;
7764                 case 6: /* D6 vblank/vline */
7765                         switch (src_data) {
7766                         case 0: /* D6 vblank */
7767                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7768                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769
7770                                 if (rdev->irq.crtc_vblank_int[5]) {
7771                                         drm_handle_vblank(rdev->ddev, 5);
7772                                         rdev->pm.vblank_sync = true;
7773                                         wake_up(&rdev->irq.vblank_queue);
7774                                 }
7775                                 if (atomic_read(&rdev->irq.pflip[5]))
7776                                         radeon_crtc_handle_vblank(rdev, 5);
7777                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7778                                 DRM_DEBUG("IH: D6 vblank\n");
7779
7780                                 break;
7781                         case 1: /* D6 vline */
7782                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7783                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784
7785                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7786                                 DRM_DEBUG("IH: D6 vline\n");
7787
7788                                 break;
7789                         default:
7790                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7791                                 break;
7792                         }
7793                         break;
7794                 case 8: /* D1 page flip */
7795                 case 10: /* D2 page flip */
7796                 case 12: /* D3 page flip */
7797                 case 14: /* D4 page flip */
7798                 case 16: /* D5 page flip */
7799                 case 18: /* D6 page flip */
7800                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7801                         if (radeon_use_pflipirq > 0)
7802                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7803                         break;
7804                 case 42: /* HPD hotplug */
7805                         switch (src_data) {
7806                         case 0:
7807                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7808                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7811                                 queue_hotplug = true;
7812                                 DRM_DEBUG("IH: HPD1\n");
7813
7814                                 break;
7815                         case 1:
7816                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7817                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7820                                 queue_hotplug = true;
7821                                 DRM_DEBUG("IH: HPD2\n");
7822
7823                                 break;
7824                         case 2:
7825                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7826                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7829                                 queue_hotplug = true;
7830                                 DRM_DEBUG("IH: HPD3\n");
7831
7832                                 break;
7833                         case 3:
7834                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7835                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7838                                 queue_hotplug = true;
7839                                 DRM_DEBUG("IH: HPD4\n");
7840
7841                                 break;
7842                         case 4:
7843                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7844                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7847                                 queue_hotplug = true;
7848                                 DRM_DEBUG("IH: HPD5\n");
7849
7850                                 break;
7851                         case 5:
7852                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7853                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7856                                 queue_hotplug = true;
7857                                 DRM_DEBUG("IH: HPD6\n");
7858
7859                                 break;
7860                         case 6:
7861                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7862                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863
7864                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7865                                 queue_dp = true;
7866                                 DRM_DEBUG("IH: HPD_RX 1\n");
7867
7868                                 break;
7869                         case 7:
7870                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7871                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872
7873                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7874                                 queue_dp = true;
7875                                 DRM_DEBUG("IH: HPD_RX 2\n");
7876
7877                                 break;
7878                         case 8:
7879                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7880                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881
7882                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7883                                 queue_dp = true;
7884                                 DRM_DEBUG("IH: HPD_RX 3\n");
7885
7886                                 break;
7887                         case 9:
7888                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7889                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890
7891                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7892                                 queue_dp = true;
7893                                 DRM_DEBUG("IH: HPD_RX 4\n");
7894
7895                                 break;
7896                         case 10:
7897                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7898                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7899
7900                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7901                                 queue_dp = true;
7902                                 DRM_DEBUG("IH: HPD_RX 5\n");
7903
7904                                 break;
7905                         case 11:
7906                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7907                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7908
7909                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7910                                 queue_dp = true;
7911                                 DRM_DEBUG("IH: HPD_RX 6\n");
7912
7913                                 break;
7914                         default:
7915                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7916                                 break;
7917                         }
7918                         break;
7919                 case 96:
7920                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7921                         WREG32(SRBM_INT_ACK, 0x1);
7922                         break;
7923                 case 124: /* UVD */
7924                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7925                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7926                         break;
7927                 case 146:
7928                 case 147:
7929                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7930                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7931                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7932                         /* reset addr and status */
7933                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7934                         if (addr == 0x0 && status == 0x0)
7935                                 break;
7936                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7937                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7938                                 addr);
7939                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7940                                 status);
7941                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7942                         break;
7943                 case 167: /* VCE */
7944                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7945                         switch (src_data) {
7946                         case 0:
7947                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7948                                 break;
7949                         case 1:
7950                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7951                                 break;
7952                         default:
7953                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7954                                 break;
7955                         }
7956                         break;
7957                 case 176: /* GFX RB CP_INT */
7958                 case 177: /* GFX IB CP_INT */
7959                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7960                         break;
7961                 case 181: /* CP EOP event */
7962                         DRM_DEBUG("IH: CP EOP\n");
7963                         /* XXX check the bitfield order! */
7964                         me_id = (ring_id & 0x60) >> 5;
7965                         pipe_id = (ring_id & 0x18) >> 3;
7966                         queue_id = (ring_id & 0x7) >> 0;
7967                         switch (me_id) {
7968                         case 0:
7969                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7970                                 break;
7971                         case 1:
7972                         case 2:
7973                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7974                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7975                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7976                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7977                                 break;
7978                         }
7979                         break;
7980                 case 184: /* CP Privileged reg access */
7981                         DRM_ERROR("Illegal register access in command stream\n");
7982                         /* XXX check the bitfield order! */
7983                         me_id = (ring_id & 0x60) >> 5;
7984                         pipe_id = (ring_id & 0x18) >> 3;
7985                         queue_id = (ring_id & 0x7) >> 0;
7986                         switch (me_id) {
7987                         case 0:
7988                                 /* This results in a full GPU reset, but all we need to do is soft
7989                                  * reset the CP for gfx
7990                                  */
7991                                 queue_reset = true;
7992                                 break;
7993                         case 1:
7994                                 /* XXX compute */
7995                                 queue_reset = true;
7996                                 break;
7997                         case 2:
7998                                 /* XXX compute */
7999                                 queue_reset = true;
8000                                 break;
8001                         }
8002                         break;
8003                 case 185: /* CP Privileged inst */
8004                         DRM_ERROR("Illegal instruction in command stream\n");
8005                         /* XXX check the bitfield order! */
8006                         me_id = (ring_id & 0x60) >> 5;
8007                         pipe_id = (ring_id & 0x18) >> 3;
8008                         queue_id = (ring_id & 0x7) >> 0;
8009                         switch (me_id) {
8010                         case 0:
8011                                 /* This results in a full GPU reset, but all we need to do is soft
8012                                  * reset the CP for gfx
8013                                  */
8014                                 queue_reset = true;
8015                                 break;
8016                         case 1:
8017                                 /* XXX compute */
8018                                 queue_reset = true;
8019                                 break;
8020                         case 2:
8021                                 /* XXX compute */
8022                                 queue_reset = true;
8023                                 break;
8024                         }
8025                         break;
8026                 case 224: /* SDMA trap event */
8027                         /* XXX check the bitfield order! */
8028                         me_id = (ring_id & 0x3) >> 0;
8029                         queue_id = (ring_id & 0xc) >> 2;
8030                         DRM_DEBUG("IH: SDMA trap\n");
8031                         switch (me_id) {
8032                         case 0:
8033                                 switch (queue_id) {
8034                                 case 0:
8035                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8036                                         break;
8037                                 case 1:
8038                                         /* XXX compute */
8039                                         break;
8040                                 case 2:
8041                                         /* XXX compute */
8042                                         break;
8043                                 }
8044                                 break;
8045                         case 1:
8046                                 switch (queue_id) {
8047                                 case 0:
8048                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8049                                         break;
8050                                 case 1:
8051                                         /* XXX compute */
8052                                         break;
8053                                 case 2:
8054                                         /* XXX compute */
8055                                         break;
8056                                 }
8057                                 break;
8058                         }
8059                         break;
8060                 case 230: /* thermal low to high */
8061                         DRM_DEBUG("IH: thermal low to high\n");
8062                         rdev->pm.dpm.thermal.high_to_low = false;
8063                         queue_thermal = true;
8064                         break;
8065                 case 231: /* thermal high to low */
8066                         DRM_DEBUG("IH: thermal high to low\n");
8067                         rdev->pm.dpm.thermal.high_to_low = true;
8068                         queue_thermal = true;
8069                         break;
8070                 case 233: /* GUI IDLE */
8071                         DRM_DEBUG("IH: GUI idle\n");
8072                         break;
8073                 case 241: /* SDMA Privileged inst */
8074                 case 247: /* SDMA Privileged inst */
8075                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8076                         /* XXX check the bitfield order! */
8077                         me_id = (ring_id & 0x3) >> 0;
8078                         queue_id = (ring_id & 0xc) >> 2;
8079                         switch (me_id) {
8080                         case 0:
8081                                 switch (queue_id) {
8082                                 case 0:
8083                                         queue_reset = true;
8084                                         break;
8085                                 case 1:
8086                                         /* XXX compute */
8087                                         queue_reset = true;
8088                                         break;
8089                                 case 2:
8090                                         /* XXX compute */
8091                                         queue_reset = true;
8092                                         break;
8093                                 }
8094                                 break;
8095                         case 1:
8096                                 switch (queue_id) {
8097                                 case 0:
8098                                         queue_reset = true;
8099                                         break;
8100                                 case 1:
8101                                         /* XXX compute */
8102                                         queue_reset = true;
8103                                         break;
8104                                 case 2:
8105                                         /* XXX compute */
8106                                         queue_reset = true;
8107                                         break;
8108                                 }
8109                                 break;
8110                         }
8111                         break;
8112                 default:
8113                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8114                         break;
8115                 }
8116
8117                 /* wptr/rptr are in bytes! */
8118                 rptr += 16;
8119                 rptr &= rdev->ih.ptr_mask;
8120                 WREG32(IH_RB_RPTR, rptr);
8121         }
8122         if (queue_dp)
8123                 schedule_work(&rdev->dp_work);
8124         if (queue_hotplug)
8125                 schedule_delayed_work(&rdev->hotplug_work, 0);
8126         if (queue_reset) {
8127                 rdev->needs_reset = true;
8128                 wake_up_all(&rdev->fence_queue);
8129         }
8130         if (queue_thermal)
8131                 schedule_work(&rdev->pm.dpm.thermal.work);
8132         rdev->ih.rptr = rptr;
8133         atomic_set(&rdev->ih.lock, 0);
8134
8135         /* make sure wptr hasn't changed while processing */
8136         wptr = cik_get_ih_wptr(rdev);
8137         if (wptr != rptr)
8138                 goto restart_ih;
8139
8140         return IRQ_HANDLED;
8141 }
8142
8143 /*
8144  * startup/shutdown callbacks
8145  */
8146 static void cik_uvd_init(struct radeon_device *rdev)
8147 {
8148         int r;
8149
8150         if (!rdev->has_uvd)
8151                 return;
8152
8153         r = radeon_uvd_init(rdev);
8154         if (r) {
8155                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8156                 /*
8157                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8158                  * to early fails cik_uvd_start() and thus nothing happens
8159                  * there. So it is pointless to try to go through that code
8160                  * hence why we disable uvd here.
8161                  */
8162                 rdev->has_uvd = 0;
8163                 return;
8164         }
8165         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8166         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8167 }
8168
8169 static void cik_uvd_start(struct radeon_device *rdev)
8170 {
8171         int r;
8172
8173         if (!rdev->has_uvd)
8174                 return;
8175
8176         r = radeon_uvd_resume(rdev);
8177         if (r) {
8178                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8179                 goto error;
8180         }
8181         r = uvd_v4_2_resume(rdev);
8182         if (r) {
8183                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8184                 goto error;
8185         }
8186         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8187         if (r) {
8188                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8189                 goto error;
8190         }
8191         return;
8192
8193 error:
8194         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8195 }
8196
8197 static void cik_uvd_resume(struct radeon_device *rdev)
8198 {
8199         struct radeon_ring *ring;
8200         int r;
8201
8202         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8203                 return;
8204
8205         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8206         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8207         if (r) {
8208                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8209                 return;
8210         }
8211         r = uvd_v1_0_init(rdev);
8212         if (r) {
8213                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8214                 return;
8215         }
8216 }
8217
8218 static void cik_vce_init(struct radeon_device *rdev)
8219 {
8220         int r;
8221
8222         if (!rdev->has_vce)
8223                 return;
8224
8225         r = radeon_vce_init(rdev);
8226         if (r) {
8227                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8228                 /*
8229                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8230                  * to early fails cik_vce_start() and thus nothing happens
8231                  * there. So it is pointless to try to go through that code
8232                  * hence why we disable vce here.
8233                  */
8234                 rdev->has_vce = 0;
8235                 return;
8236         }
8237         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8238         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8239         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8240         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8241 }
8242
8243 static void cik_vce_start(struct radeon_device *rdev)
8244 {
8245         int r;
8246
8247         if (!rdev->has_vce)
8248                 return;
8249
8250         r = radeon_vce_resume(rdev);
8251         if (r) {
8252                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8253                 goto error;
8254         }
8255         r = vce_v2_0_resume(rdev);
8256         if (r) {
8257                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8258                 goto error;
8259         }
8260         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8261         if (r) {
8262                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8263                 goto error;
8264         }
8265         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8266         if (r) {
8267                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8268                 goto error;
8269         }
8270         return;
8271
8272 error:
8273         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8274         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8275 }
8276
8277 static void cik_vce_resume(struct radeon_device *rdev)
8278 {
8279         struct radeon_ring *ring;
8280         int r;
8281
8282         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8283                 return;
8284
8285         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8286         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8287         if (r) {
8288                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8289                 return;
8290         }
8291         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8292         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8293         if (r) {
8294                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8295                 return;
8296         }
8297         r = vce_v1_0_init(rdev);
8298         if (r) {
8299                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8300                 return;
8301         }
8302 }
8303
8304 /**
8305  * cik_startup - program the asic to a functional state
8306  *
8307  * @rdev: radeon_device pointer
8308  *
8309  * Programs the asic to a functional state (CIK).
8310  * Called by cik_init() and cik_resume().
8311  * Returns 0 for success, error for failure.
8312  */
8313 static int cik_startup(struct radeon_device *rdev)
8314 {
8315         struct radeon_ring *ring;
8316         u32 nop;
8317         int r;
8318
8319         /* enable pcie gen2/3 link */
8320         cik_pcie_gen3_enable(rdev);
8321         /* enable aspm */
8322         cik_program_aspm(rdev);
8323
8324         /* scratch needs to be initialized before MC */
8325         r = r600_vram_scratch_init(rdev);
8326         if (r)
8327                 return r;
8328
8329         cik_mc_program(rdev);
8330
8331         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8332                 r = ci_mc_load_microcode(rdev);
8333                 if (r) {
8334                         DRM_ERROR("Failed to load MC firmware!\n");
8335                         return r;
8336                 }
8337         }
8338
8339         r = cik_pcie_gart_enable(rdev);
8340         if (r)
8341                 return r;
8342         cik_gpu_init(rdev);
8343
8344         /* allocate rlc buffers */
8345         if (rdev->flags & RADEON_IS_IGP) {
8346                 if (rdev->family == CHIP_KAVERI) {
8347                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8348                         rdev->rlc.reg_list_size =
8349                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8350                 } else {
8351                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8352                         rdev->rlc.reg_list_size =
8353                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8354                 }
8355         }
8356         rdev->rlc.cs_data = ci_cs_data;
8357         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8358         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8359         r = sumo_rlc_init(rdev);
8360         if (r) {
8361                 DRM_ERROR("Failed to init rlc BOs!\n");
8362                 return r;
8363         }
8364
8365         /* allocate wb buffer */
8366         r = radeon_wb_init(rdev);
8367         if (r)
8368                 return r;
8369
8370         /* allocate mec buffers */
8371         r = cik_mec_init(rdev);
8372         if (r) {
8373                 DRM_ERROR("Failed to init MEC BOs!\n");
8374                 return r;
8375         }
8376
8377         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8378         if (r) {
8379                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8380                 return r;
8381         }
8382
8383         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8384         if (r) {
8385                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8386                 return r;
8387         }
8388
8389         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8390         if (r) {
8391                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8392                 return r;
8393         }
8394
8395         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8396         if (r) {
8397                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8398                 return r;
8399         }
8400
8401         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8402         if (r) {
8403                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8404                 return r;
8405         }
8406
8407         cik_uvd_start(rdev);
8408         cik_vce_start(rdev);
8409
8410         /* Enable IRQ */
8411         if (!rdev->irq.installed) {
8412                 r = radeon_irq_kms_init(rdev);
8413                 if (r)
8414                         return r;
8415         }
8416
8417         r = cik_irq_init(rdev);
8418         if (r) {
8419                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8420                 radeon_irq_kms_fini(rdev);
8421                 return r;
8422         }
8423         cik_irq_set(rdev);
8424
8425         if (rdev->family == CHIP_HAWAII) {
8426                 if (rdev->new_fw)
8427                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8428                 else
8429                         nop = RADEON_CP_PACKET2;
8430         } else {
8431                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8432         }
8433
8434         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8435         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8436                              nop);
8437         if (r)
8438                 return r;
8439
8440         /* set up the compute queues */
8441         /* type-2 packets are deprecated on MEC, use type-3 instead */
8442         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8443         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8444                              nop);
8445         if (r)
8446                 return r;
8447         ring->me = 1; /* first MEC */
8448         ring->pipe = 0; /* first pipe */
8449         ring->queue = 0; /* first queue */
8450         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8451
8452         /* type-2 packets are deprecated on MEC, use type-3 instead */
8453         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8454         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8455                              nop);
8456         if (r)
8457                 return r;
8458         /* dGPU only have 1 MEC */
8459         ring->me = 1; /* first MEC */
8460         ring->pipe = 0; /* first pipe */
8461         ring->queue = 1; /* second queue */
8462         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8463
8464         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8465         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8466                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8467         if (r)
8468                 return r;
8469
8470         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8471         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8472                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8473         if (r)
8474                 return r;
8475
8476         r = cik_cp_resume(rdev);
8477         if (r)
8478                 return r;
8479
8480         r = cik_sdma_resume(rdev);
8481         if (r)
8482                 return r;
8483
8484         cik_uvd_resume(rdev);
8485         cik_vce_resume(rdev);
8486
8487         r = radeon_ib_pool_init(rdev);
8488         if (r) {
8489                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8490                 return r;
8491         }
8492
8493         r = radeon_vm_manager_init(rdev);
8494         if (r) {
8495                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8496                 return r;
8497         }
8498
8499         r = radeon_audio_init(rdev);
8500         if (r)
8501                 return r;
8502
8503         r = radeon_kfd_resume(rdev);
8504         if (r)
8505                 return r;
8506
8507         return 0;
8508 }
8509
8510 /**
8511  * cik_resume - resume the asic to a functional state
8512  *
8513  * @rdev: radeon_device pointer
8514  *
8515  * Programs the asic to a functional state (CIK).
8516  * Called at resume.
8517  * Returns 0 for success, error for failure.
8518  */
8519 int cik_resume(struct radeon_device *rdev)
8520 {
8521         int r;
8522
8523         /* post card */
8524         atom_asic_init(rdev->mode_info.atom_context);
8525
8526         /* init golden registers */
8527         cik_init_golden_registers(rdev);
8528
8529         if (rdev->pm.pm_method == PM_METHOD_DPM)
8530                 radeon_pm_resume(rdev);
8531
8532         rdev->accel_working = true;
8533         r = cik_startup(rdev);
8534         if (r) {
8535                 DRM_ERROR("cik startup failed on resume\n");
8536                 rdev->accel_working = false;
8537                 return r;
8538         }
8539
8540         return r;
8541
8542 }
8543
8544 /**
8545  * cik_suspend - suspend the asic
8546  *
8547  * @rdev: radeon_device pointer
8548  *
8549  * Bring the chip into a state suitable for suspend (CIK).
8550  * Called at suspend.
8551  * Returns 0 for success.
8552  */
8553 int cik_suspend(struct radeon_device *rdev)
8554 {
8555         radeon_kfd_suspend(rdev);
8556         radeon_pm_suspend(rdev);
8557         radeon_audio_fini(rdev);
8558         radeon_vm_manager_fini(rdev);
8559         cik_cp_enable(rdev, false);
8560         cik_sdma_enable(rdev, false);
8561         if (rdev->has_uvd) {
8562                 uvd_v1_0_fini(rdev);
8563                 radeon_uvd_suspend(rdev);
8564         }
8565         if (rdev->has_vce)
8566                 radeon_vce_suspend(rdev);
8567         cik_fini_pg(rdev);
8568         cik_fini_cg(rdev);
8569         cik_irq_suspend(rdev);
8570         radeon_wb_disable(rdev);
8571         cik_pcie_gart_disable(rdev);
8572         return 0;
8573 }
8574
8575 /* Plan is to move initialization in that function and use
8576  * helper function so that radeon_device_init pretty much
8577  * do nothing more than calling asic specific function. This
8578  * should also allow to remove a bunch of callback function
8579  * like vram_info.
8580  */
8581 /**
8582  * cik_init - asic specific driver and hw init
8583  *
8584  * @rdev: radeon_device pointer
8585  *
8586  * Setup asic specific driver variables and program the hw
8587  * to a functional state (CIK).
8588  * Called at driver startup.
8589  * Returns 0 for success, errors for failure.
8590  */
8591 int cik_init(struct radeon_device *rdev)
8592 {
8593         struct radeon_ring *ring;
8594         int r;
8595
8596         /* Read BIOS */
8597         if (!radeon_get_bios(rdev)) {
8598                 if (ASIC_IS_AVIVO(rdev))
8599                         return -EINVAL;
8600         }
8601         /* Must be an ATOMBIOS */
8602         if (!rdev->is_atom_bios) {
8603                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8604                 return -EINVAL;
8605         }
8606         r = radeon_atombios_init(rdev);
8607         if (r)
8608                 return r;
8609
8610         /* Post card if necessary */
8611         if (!radeon_card_posted(rdev)) {
8612                 if (!rdev->bios) {
8613                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8614                         return -EINVAL;
8615                 }
8616                 DRM_INFO("GPU not posted. posting now...\n");
8617                 atom_asic_init(rdev->mode_info.atom_context);
8618         }
8619         /* init golden registers */
8620         cik_init_golden_registers(rdev);
8621         /* Initialize scratch registers */
8622         cik_scratch_init(rdev);
8623         /* Initialize surface registers */
8624         radeon_surface_init(rdev);
8625         /* Initialize clocks */
8626         radeon_get_clock_info(rdev->ddev);
8627
8628         /* Fence driver */
8629         r = radeon_fence_driver_init(rdev);
8630         if (r)
8631                 return r;
8632
8633         /* initialize memory controller */
8634         r = cik_mc_init(rdev);
8635         if (r)
8636                 return r;
8637         /* Memory manager */
8638         r = radeon_bo_init(rdev);
8639         if (r)
8640                 return r;
8641
8642         if (rdev->flags & RADEON_IS_IGP) {
8643                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8644                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8645                         r = cik_init_microcode(rdev);
8646                         if (r) {
8647                                 DRM_ERROR("Failed to load firmware!\n");
8648                                 return r;
8649                         }
8650                 }
8651         } else {
8652                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8653                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8654                     !rdev->mc_fw) {
8655                         r = cik_init_microcode(rdev);
8656                         if (r) {
8657                                 DRM_ERROR("Failed to load firmware!\n");
8658                                 return r;
8659                         }
8660                 }
8661         }
8662
8663         /* Initialize power management */
8664         radeon_pm_init(rdev);
8665
8666         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8667         ring->ring_obj = NULL;
8668         r600_ring_init(rdev, ring, 1024 * 1024);
8669
8670         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8671         ring->ring_obj = NULL;
8672         r600_ring_init(rdev, ring, 1024 * 1024);
8673         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8674         if (r)
8675                 return r;
8676
8677         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8678         ring->ring_obj = NULL;
8679         r600_ring_init(rdev, ring, 1024 * 1024);
8680         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8681         if (r)
8682                 return r;
8683
8684         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8685         ring->ring_obj = NULL;
8686         r600_ring_init(rdev, ring, 256 * 1024);
8687
8688         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8689         ring->ring_obj = NULL;
8690         r600_ring_init(rdev, ring, 256 * 1024);
8691
8692         cik_uvd_init(rdev);
8693         cik_vce_init(rdev);
8694
8695         rdev->ih.ring_obj = NULL;
8696         r600_ih_ring_init(rdev, 64 * 1024);
8697
8698         r = r600_pcie_gart_init(rdev);
8699         if (r)
8700                 return r;
8701
8702         rdev->accel_working = true;
8703         r = cik_startup(rdev);
8704         if (r) {
8705                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8706                 cik_cp_fini(rdev);
8707                 cik_sdma_fini(rdev);
8708                 cik_irq_fini(rdev);
8709                 sumo_rlc_fini(rdev);
8710                 cik_mec_fini(rdev);
8711                 radeon_wb_fini(rdev);
8712                 radeon_ib_pool_fini(rdev);
8713                 radeon_vm_manager_fini(rdev);
8714                 radeon_irq_kms_fini(rdev);
8715                 cik_pcie_gart_fini(rdev);
8716                 rdev->accel_working = false;
8717         }
8718
8719         /* Don't start up if the MC ucode is missing.
8720          * The default clocks and voltages before the MC ucode
8721          * is loaded are not suffient for advanced operations.
8722          */
8723         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8724                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8725                 return -EINVAL;
8726         }
8727
8728         return 0;
8729 }
8730
8731 /**
8732  * cik_fini - asic specific driver and hw fini
8733  *
8734  * @rdev: radeon_device pointer
8735  *
8736  * Tear down the asic specific driver variables and program the hw
8737  * to an idle state (CIK).
8738  * Called at driver unload.
8739  */
8740 void cik_fini(struct radeon_device *rdev)
8741 {
8742         radeon_pm_fini(rdev);
8743         cik_cp_fini(rdev);
8744         cik_sdma_fini(rdev);
8745         cik_fini_pg(rdev);
8746         cik_fini_cg(rdev);
8747         cik_irq_fini(rdev);
8748         sumo_rlc_fini(rdev);
8749         cik_mec_fini(rdev);
8750         radeon_wb_fini(rdev);
8751         radeon_vm_manager_fini(rdev);
8752         radeon_ib_pool_fini(rdev);
8753         radeon_irq_kms_fini(rdev);
8754         uvd_v1_0_fini(rdev);
8755         radeon_uvd_fini(rdev);
8756         radeon_vce_fini(rdev);
8757         cik_pcie_gart_fini(rdev);
8758         r600_vram_scratch_fini(rdev);
8759         radeon_gem_fini(rdev);
8760         radeon_fence_driver_fini(rdev);
8761         radeon_bo_fini(rdev);
8762         radeon_atombios_fini(rdev);
8763         kfree(rdev->bios);
8764         rdev->bios = NULL;
8765 }
8766
8767 void dce8_program_fmt(struct drm_encoder *encoder)
8768 {
8769         struct drm_device *dev = encoder->dev;
8770         struct radeon_device *rdev = dev->dev_private;
8771         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8772         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8773         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8774         int bpc = 0;
8775         u32 tmp = 0;
8776         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8777
8778         if (connector) {
8779                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8780                 bpc = radeon_get_monitor_bpc(connector);
8781                 dither = radeon_connector->dither;
8782         }
8783
8784         /* LVDS/eDP FMT is set up by atom */
8785         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8786                 return;
8787
8788         /* not needed for analog */
8789         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8790             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8791                 return;
8792
8793         if (bpc == 0)
8794                 return;
8795
8796         switch (bpc) {
8797         case 6:
8798                 if (dither == RADEON_FMT_DITHER_ENABLE)
8799                         /* XXX sort out optimal dither settings */
8800                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8801                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8802                 else
8803                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8804                 break;
8805         case 8:
8806                 if (dither == RADEON_FMT_DITHER_ENABLE)
8807                         /* XXX sort out optimal dither settings */
8808                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8809                                 FMT_RGB_RANDOM_ENABLE |
8810                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8811                 else
8812                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8813                 break;
8814         case 10:
8815                 if (dither == RADEON_FMT_DITHER_ENABLE)
8816                         /* XXX sort out optimal dither settings */
8817                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8818                                 FMT_RGB_RANDOM_ENABLE |
8819                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8820                 else
8821                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8822                 break;
8823         default:
8824                 /* not needed */
8825                 break;
8826         }
8827
8828         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8829 }
8830
8831 /* display watermark setup */
8832 /**
8833  * dce8_line_buffer_adjust - Set up the line buffer
8834  *
8835  * @rdev: radeon_device pointer
8836  * @radeon_crtc: the selected display controller
8837  * @mode: the current display mode on the selected display
8838  * controller
8839  *
8840  * Setup up the line buffer allocation for
8841  * the selected display controller (CIK).
8842  * Returns the line buffer size in pixels.
8843  */
8844 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8845                                    struct radeon_crtc *radeon_crtc,
8846                                    struct drm_display_mode *mode)
8847 {
8848         u32 tmp, buffer_alloc, i;
8849         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8850         /*
8851          * Line Buffer Setup
8852          * There are 6 line buffers, one for each display controllers.
8853          * There are 3 partitions per LB. Select the number of partitions
8854          * to enable based on the display width.  For display widths larger
8855          * than 4096, you need use to use 2 display controllers and combine
8856          * them using the stereo blender.
8857          */
8858         if (radeon_crtc->base.enabled && mode) {
8859                 if (mode->crtc_hdisplay < 1920) {
8860                         tmp = 1;
8861                         buffer_alloc = 2;
8862                 } else if (mode->crtc_hdisplay < 2560) {
8863                         tmp = 2;
8864                         buffer_alloc = 2;
8865                 } else if (mode->crtc_hdisplay < 4096) {
8866                         tmp = 0;
8867                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8868                 } else {
8869                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8870                         tmp = 0;
8871                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8872                 }
8873         } else {
8874                 tmp = 1;
8875                 buffer_alloc = 0;
8876         }
8877
8878         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8879                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8880
8881         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8882                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8883         for (i = 0; i < rdev->usec_timeout; i++) {
8884                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8885                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8886                         break;
8887                 udelay(1);
8888         }
8889
8890         if (radeon_crtc->base.enabled && mode) {
8891                 switch (tmp) {
8892                 case 0:
8893                 default:
8894                         return 4096 * 2;
8895                 case 1:
8896                         return 1920 * 2;
8897                 case 2:
8898                         return 2560 * 2;
8899                 }
8900         }
8901
8902         /* controller not enabled, so no lb used */
8903         return 0;
8904 }
8905
8906 /**
8907  * cik_get_number_of_dram_channels - get the number of dram channels
8908  *
8909  * @rdev: radeon_device pointer
8910  *
8911  * Look up the number of video ram channels (CIK).
8912  * Used for display watermark bandwidth calculations
8913  * Returns the number of dram channels
8914  */
8915 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8916 {
8917         u32 tmp = RREG32(MC_SHARED_CHMAP);
8918
8919         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8920         case 0:
8921         default:
8922                 return 1;
8923         case 1:
8924                 return 2;
8925         case 2:
8926                 return 4;
8927         case 3:
8928                 return 8;
8929         case 4:
8930                 return 3;
8931         case 5:
8932                 return 6;
8933         case 6:
8934                 return 10;
8935         case 7:
8936                 return 12;
8937         case 8:
8938                 return 16;
8939         }
8940 }
8941
8942 struct dce8_wm_params {
8943         u32 dram_channels; /* number of dram channels */
8944         u32 yclk;          /* bandwidth per dram data pin in kHz */
8945         u32 sclk;          /* engine clock in kHz */
8946         u32 disp_clk;      /* display clock in kHz */
8947         u32 src_width;     /* viewport width */
8948         u32 active_time;   /* active display time in ns */
8949         u32 blank_time;    /* blank time in ns */
8950         bool interlaced;    /* mode is interlaced */
8951         fixed20_12 vsc;    /* vertical scale ratio */
8952         u32 num_heads;     /* number of active crtcs */
8953         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8954         u32 lb_size;       /* line buffer allocated to pipe */
8955         u32 vtaps;         /* vertical scaler taps */
8956 };
8957
8958 /**
8959  * dce8_dram_bandwidth - get the dram bandwidth
8960  *
8961  * @wm: watermark calculation data
8962  *
8963  * Calculate the raw dram bandwidth (CIK).
8964  * Used for display watermark bandwidth calculations
8965  * Returns the dram bandwidth in MBytes/s
8966  */
8967 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8968 {
8969         /* Calculate raw DRAM Bandwidth */
8970         fixed20_12 dram_efficiency; /* 0.7 */
8971         fixed20_12 yclk, dram_channels, bandwidth;
8972         fixed20_12 a;
8973
8974         a.full = dfixed_const(1000);
8975         yclk.full = dfixed_const(wm->yclk);
8976         yclk.full = dfixed_div(yclk, a);
8977         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8978         a.full = dfixed_const(10);
8979         dram_efficiency.full = dfixed_const(7);
8980         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8981         bandwidth.full = dfixed_mul(dram_channels, yclk);
8982         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8983
8984         return dfixed_trunc(bandwidth);
8985 }
8986
8987 /**
8988  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the dram bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the dram bandwidth for display in MBytes/s
8995  */
8996 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8997 {
8998         /* Calculate DRAM Bandwidth and the part allocated to display. */
8999         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9000         fixed20_12 yclk, dram_channels, bandwidth;
9001         fixed20_12 a;
9002
9003         a.full = dfixed_const(1000);
9004         yclk.full = dfixed_const(wm->yclk);
9005         yclk.full = dfixed_div(yclk, a);
9006         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9007         a.full = dfixed_const(10);
9008         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9009         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9010         bandwidth.full = dfixed_mul(dram_channels, yclk);
9011         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9012
9013         return dfixed_trunc(bandwidth);
9014 }
9015
9016 /**
9017  * dce8_data_return_bandwidth - get the data return bandwidth
9018  *
9019  * @wm: watermark calculation data
9020  *
9021  * Calculate the data return bandwidth used for display (CIK).
9022  * Used for display watermark bandwidth calculations
9023  * Returns the data return bandwidth in MBytes/s
9024  */
9025 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9026 {
9027         /* Calculate the display Data return Bandwidth */
9028         fixed20_12 return_efficiency; /* 0.8 */
9029         fixed20_12 sclk, bandwidth;
9030         fixed20_12 a;
9031
9032         a.full = dfixed_const(1000);
9033         sclk.full = dfixed_const(wm->sclk);
9034         sclk.full = dfixed_div(sclk, a);
9035         a.full = dfixed_const(10);
9036         return_efficiency.full = dfixed_const(8);
9037         return_efficiency.full = dfixed_div(return_efficiency, a);
9038         a.full = dfixed_const(32);
9039         bandwidth.full = dfixed_mul(a, sclk);
9040         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9041
9042         return dfixed_trunc(bandwidth);
9043 }
9044
9045 /**
9046  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9047  *
9048  * @wm: watermark calculation data
9049  *
9050  * Calculate the dmif bandwidth used for display (CIK).
9051  * Used for display watermark bandwidth calculations
9052  * Returns the dmif bandwidth in MBytes/s
9053  */
9054 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9055 {
9056         /* Calculate the DMIF Request Bandwidth */
9057         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9058         fixed20_12 disp_clk, bandwidth;
9059         fixed20_12 a, b;
9060
9061         a.full = dfixed_const(1000);
9062         disp_clk.full = dfixed_const(wm->disp_clk);
9063         disp_clk.full = dfixed_div(disp_clk, a);
9064         a.full = dfixed_const(32);
9065         b.full = dfixed_mul(a, disp_clk);
9066
9067         a.full = dfixed_const(10);
9068         disp_clk_request_efficiency.full = dfixed_const(8);
9069         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9070
9071         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9072
9073         return dfixed_trunc(bandwidth);
9074 }
9075
9076 /**
9077  * dce8_available_bandwidth - get the min available bandwidth
9078  *
9079  * @wm: watermark calculation data
9080  *
9081  * Calculate the min available bandwidth used for display (CIK).
9082  * Used for display watermark bandwidth calculations
9083  * Returns the min available bandwidth in MBytes/s
9084  */
9085 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9086 {
9087         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9088         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9089         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9090         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9091
9092         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9093 }
9094
9095 /**
9096  * dce8_average_bandwidth - get the average available bandwidth
9097  *
9098  * @wm: watermark calculation data
9099  *
9100  * Calculate the average available bandwidth used for display (CIK).
9101  * Used for display watermark bandwidth calculations
9102  * Returns the average available bandwidth in MBytes/s
9103  */
9104 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9105 {
9106         /* Calculate the display mode Average Bandwidth
9107          * DisplayMode should contain the source and destination dimensions,
9108          * timing, etc.
9109          */
9110         fixed20_12 bpp;
9111         fixed20_12 line_time;
9112         fixed20_12 src_width;
9113         fixed20_12 bandwidth;
9114         fixed20_12 a;
9115
9116         a.full = dfixed_const(1000);
9117         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9118         line_time.full = dfixed_div(line_time, a);
9119         bpp.full = dfixed_const(wm->bytes_per_pixel);
9120         src_width.full = dfixed_const(wm->src_width);
9121         bandwidth.full = dfixed_mul(src_width, bpp);
9122         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9123         bandwidth.full = dfixed_div(bandwidth, line_time);
9124
9125         return dfixed_trunc(bandwidth);
9126 }
9127
9128 /**
9129  * dce8_latency_watermark - get the latency watermark
9130  *
9131  * @wm: watermark calculation data
9132  *
9133  * Calculate the latency watermark (CIK).
9134  * Used for display watermark bandwidth calculations
9135  * Returns the latency watermark in ns
9136  */
9137 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9138 {
9139         /* First calculate the latency in ns */
9140         u32 mc_latency = 2000; /* 2000 ns. */
9141         u32 available_bandwidth = dce8_available_bandwidth(wm);
9142         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9143         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9144         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9145         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9146                 (wm->num_heads * cursor_line_pair_return_time);
9147         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9148         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9149         u32 tmp, dmif_size = 12288;
9150         fixed20_12 a, b, c;
9151
9152         if (wm->num_heads == 0)
9153                 return 0;
9154
9155         a.full = dfixed_const(2);
9156         b.full = dfixed_const(1);
9157         if ((wm->vsc.full > a.full) ||
9158             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9159             (wm->vtaps >= 5) ||
9160             ((wm->vsc.full >= a.full) && wm->interlaced))
9161                 max_src_lines_per_dst_line = 4;
9162         else
9163                 max_src_lines_per_dst_line = 2;
9164
9165         a.full = dfixed_const(available_bandwidth);
9166         b.full = dfixed_const(wm->num_heads);
9167         a.full = dfixed_div(a, b);
9168
9169         b.full = dfixed_const(mc_latency + 512);
9170         c.full = dfixed_const(wm->disp_clk);
9171         b.full = dfixed_div(b, c);
9172
9173         c.full = dfixed_const(dmif_size);
9174         b.full = dfixed_div(c, b);
9175
9176         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9177
9178         b.full = dfixed_const(1000);
9179         c.full = dfixed_const(wm->disp_clk);
9180         b.full = dfixed_div(c, b);
9181         c.full = dfixed_const(wm->bytes_per_pixel);
9182         b.full = dfixed_mul(b, c);
9183
9184         lb_fill_bw = min(tmp, dfixed_trunc(b));
9185
9186         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9187         b.full = dfixed_const(1000);
9188         c.full = dfixed_const(lb_fill_bw);
9189         b.full = dfixed_div(c, b);
9190         a.full = dfixed_div(a, b);
9191         line_fill_time = dfixed_trunc(a);
9192
9193         if (line_fill_time < wm->active_time)
9194                 return latency;
9195         else
9196                 return latency + (line_fill_time - wm->active_time);
9197
9198 }
9199
9200 /**
9201  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9202  * average and available dram bandwidth
9203  *
9204  * @wm: watermark calculation data
9205  *
9206  * Check if the display average bandwidth fits in the display
9207  * dram bandwidth (CIK).
9208  * Used for display watermark bandwidth calculations
9209  * Returns true if the display fits, false if not.
9210  */
9211 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9212 {
9213         if (dce8_average_bandwidth(wm) <=
9214             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9215                 return true;
9216         else
9217                 return false;
9218 }
9219
9220 /**
9221  * dce8_average_bandwidth_vs_available_bandwidth - check
9222  * average and available bandwidth
9223  *
9224  * @wm: watermark calculation data
9225  *
9226  * Check if the display average bandwidth fits in the display
9227  * available bandwidth (CIK).
9228  * Used for display watermark bandwidth calculations
9229  * Returns true if the display fits, false if not.
9230  */
9231 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9232 {
9233         if (dce8_average_bandwidth(wm) <=
9234             (dce8_available_bandwidth(wm) / wm->num_heads))
9235                 return true;
9236         else
9237                 return false;
9238 }
9239
9240 /**
9241  * dce8_check_latency_hiding - check latency hiding
9242  *
9243  * @wm: watermark calculation data
9244  *
9245  * Check latency hiding (CIK).
9246  * Used for display watermark bandwidth calculations
9247  * Returns true if the display fits, false if not.
9248  */
9249 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9250 {
9251         u32 lb_partitions = wm->lb_size / wm->src_width;
9252         u32 line_time = wm->active_time + wm->blank_time;
9253         u32 latency_tolerant_lines;
9254         u32 latency_hiding;
9255         fixed20_12 a;
9256
9257         a.full = dfixed_const(1);
9258         if (wm->vsc.full > a.full)
9259                 latency_tolerant_lines = 1;
9260         else {
9261                 if (lb_partitions <= (wm->vtaps + 1))
9262                         latency_tolerant_lines = 1;
9263                 else
9264                         latency_tolerant_lines = 2;
9265         }
9266
9267         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9268
9269         if (dce8_latency_watermark(wm) <= latency_hiding)
9270                 return true;
9271         else
9272                 return false;
9273 }
9274
9275 /**
9276  * dce8_program_watermarks - program display watermarks
9277  *
9278  * @rdev: radeon_device pointer
9279  * @radeon_crtc: the selected display controller
9280  * @lb_size: line buffer size
9281  * @num_heads: number of display controllers in use
9282  *
9283  * Calculate and program the display watermarks for the
9284  * selected display controller (CIK).
9285  */
9286 static void dce8_program_watermarks(struct radeon_device *rdev,
9287                                     struct radeon_crtc *radeon_crtc,
9288                                     u32 lb_size, u32 num_heads)
9289 {
9290         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9291         struct dce8_wm_params wm_low, wm_high;
9292         u32 pixel_period;
9293         u32 line_time = 0;
9294         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9295         u32 tmp, wm_mask;
9296
9297         if (radeon_crtc->base.enabled && num_heads && mode) {
9298                 pixel_period = 1000000 / (u32)mode->clock;
9299                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9300
9301                 /* watermark for high clocks */
9302                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9303                     rdev->pm.dpm_enabled) {
9304                         wm_high.yclk =
9305                                 radeon_dpm_get_mclk(rdev, false) * 10;
9306                         wm_high.sclk =
9307                                 radeon_dpm_get_sclk(rdev, false) * 10;
9308                 } else {
9309                         wm_high.yclk = rdev->pm.current_mclk * 10;
9310                         wm_high.sclk = rdev->pm.current_sclk * 10;
9311                 }
9312
9313                 wm_high.disp_clk = mode->clock;
9314                 wm_high.src_width = mode->crtc_hdisplay;
9315                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9316                 wm_high.blank_time = line_time - wm_high.active_time;
9317                 wm_high.interlaced = false;
9318                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9319                         wm_high.interlaced = true;
9320                 wm_high.vsc = radeon_crtc->vsc;
9321                 wm_high.vtaps = 1;
9322                 if (radeon_crtc->rmx_type != RMX_OFF)
9323                         wm_high.vtaps = 2;
9324                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9325                 wm_high.lb_size = lb_size;
9326                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9327                 wm_high.num_heads = num_heads;
9328
9329                 /* set for high clocks */
9330                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9331
9332                 /* possibly force display priority to high */
9333                 /* should really do this at mode validation time... */
9334                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9335                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9336                     !dce8_check_latency_hiding(&wm_high) ||
9337                     (rdev->disp_priority == 2)) {
9338                         DRM_DEBUG_KMS("force priority to high\n");
9339                 }
9340
9341                 /* watermark for low clocks */
9342                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9343                     rdev->pm.dpm_enabled) {
9344                         wm_low.yclk =
9345                                 radeon_dpm_get_mclk(rdev, true) * 10;
9346                         wm_low.sclk =
9347                                 radeon_dpm_get_sclk(rdev, true) * 10;
9348                 } else {
9349                         wm_low.yclk = rdev->pm.current_mclk * 10;
9350                         wm_low.sclk = rdev->pm.current_sclk * 10;
9351                 }
9352
9353                 wm_low.disp_clk = mode->clock;
9354                 wm_low.src_width = mode->crtc_hdisplay;
9355                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9356                 wm_low.blank_time = line_time - wm_low.active_time;
9357                 wm_low.interlaced = false;
9358                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9359                         wm_low.interlaced = true;
9360                 wm_low.vsc = radeon_crtc->vsc;
9361                 wm_low.vtaps = 1;
9362                 if (radeon_crtc->rmx_type != RMX_OFF)
9363                         wm_low.vtaps = 2;
9364                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9365                 wm_low.lb_size = lb_size;
9366                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9367                 wm_low.num_heads = num_heads;
9368
9369                 /* set for low clocks */
9370                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9371
9372                 /* possibly force display priority to high */
9373                 /* should really do this at mode validation time... */
9374                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9375                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9376                     !dce8_check_latency_hiding(&wm_low) ||
9377                     (rdev->disp_priority == 2)) {
9378                         DRM_DEBUG_KMS("force priority to high\n");
9379                 }
9380
9381                 /* Save number of lines the linebuffer leads before the scanout */
9382                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9383         }
9384
9385         /* select wm A */
9386         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9387         tmp = wm_mask;
9388         tmp &= ~LATENCY_WATERMARK_MASK(3);
9389         tmp |= LATENCY_WATERMARK_MASK(1);
9390         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9391         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9392                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9393                 LATENCY_HIGH_WATERMARK(line_time)));
9394         /* select wm B */
9395         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9396         tmp &= ~LATENCY_WATERMARK_MASK(3);
9397         tmp |= LATENCY_WATERMARK_MASK(2);
9398         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9399         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9400                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9401                 LATENCY_HIGH_WATERMARK(line_time)));
9402         /* restore original selection */
9403         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9404
9405         /* save values for DPM */
9406         radeon_crtc->line_time = line_time;
9407         radeon_crtc->wm_high = latency_watermark_a;
9408         radeon_crtc->wm_low = latency_watermark_b;
9409 }
9410
9411 /**
9412  * dce8_bandwidth_update - program display watermarks
9413  *
9414  * @rdev: radeon_device pointer
9415  *
9416  * Calculate and program the display watermarks and line
9417  * buffer allocation (CIK).
9418  */
9419 void dce8_bandwidth_update(struct radeon_device *rdev)
9420 {
9421         struct drm_display_mode *mode = NULL;
9422         u32 num_heads = 0, lb_size;
9423         int i;
9424
9425         if (!rdev->mode_info.mode_config_initialized)
9426                 return;
9427
9428         radeon_update_display_priority(rdev);
9429
9430         for (i = 0; i < rdev->num_crtc; i++) {
9431                 if (rdev->mode_info.crtcs[i]->base.enabled)
9432                         num_heads++;
9433         }
9434         for (i = 0; i < rdev->num_crtc; i++) {
9435                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9436                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9437                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9438         }
9439 }
9440
9441 /**
9442  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9443  *
9444  * @rdev: radeon_device pointer
9445  *
9446  * Fetches a GPU clock counter snapshot (SI).
9447  * Returns the 64 bit clock counter snapshot.
9448  */
9449 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9450 {
9451         uint64_t clock;
9452
9453         mutex_lock(&rdev->gpu_clock_mutex);
9454         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9455         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9456                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9457         mutex_unlock(&rdev->gpu_clock_mutex);
9458         return clock;
9459 }
9460
9461 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9462                              u32 cntl_reg, u32 status_reg)
9463 {
9464         int r, i;
9465         struct atom_clock_dividers dividers;
9466         uint32_t tmp;
9467
9468         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9469                                            clock, false, &dividers);
9470         if (r)
9471                 return r;
9472
9473         tmp = RREG32_SMC(cntl_reg);
9474         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9475         tmp |= dividers.post_divider;
9476         WREG32_SMC(cntl_reg, tmp);
9477
9478         for (i = 0; i < 100; i++) {
9479                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9480                         break;
9481                 mdelay(10);
9482         }
9483         if (i == 100)
9484                 return -ETIMEDOUT;
9485
9486         return 0;
9487 }
9488
9489 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9490 {
9491         int r = 0;
9492
9493         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9494         if (r)
9495                 return r;
9496
9497         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9498         return r;
9499 }
9500
9501 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9502 {
9503         int r, i;
9504         struct atom_clock_dividers dividers;
9505         u32 tmp;
9506
9507         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9508                                            ecclk, false, &dividers);
9509         if (r)
9510                 return r;
9511
9512         for (i = 0; i < 100; i++) {
9513                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9514                         break;
9515                 mdelay(10);
9516         }
9517         if (i == 100)
9518                 return -ETIMEDOUT;
9519
9520         tmp = RREG32_SMC(CG_ECLK_CNTL);
9521         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9522         tmp |= dividers.post_divider;
9523         WREG32_SMC(CG_ECLK_CNTL, tmp);
9524
9525         for (i = 0; i < 100; i++) {
9526                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9527                         break;
9528                 mdelay(10);
9529         }
9530         if (i == 100)
9531                 return -ETIMEDOUT;
9532
9533         return 0;
9534 }
9535
9536 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9537 {
9538         struct pci_dev *root = rdev->pdev->bus->self;
9539         int bridge_pos, gpu_pos;
9540         u32 speed_cntl, mask, current_data_rate;
9541         int ret, i;
9542         u16 tmp16;
9543
9544         if (pci_is_root_bus(rdev->pdev->bus))
9545                 return;
9546
9547         if (radeon_pcie_gen2 == 0)
9548                 return;
9549
9550         if (rdev->flags & RADEON_IS_IGP)
9551                 return;
9552
9553         if (!(rdev->flags & RADEON_IS_PCIE))
9554                 return;
9555
9556         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9557         if (ret != 0)
9558                 return;
9559
9560         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9561                 return;
9562
9563         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9564         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9565                 LC_CURRENT_DATA_RATE_SHIFT;
9566         if (mask & DRM_PCIE_SPEED_80) {
9567                 if (current_data_rate == 2) {
9568                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9569                         return;
9570                 }
9571                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9572         } else if (mask & DRM_PCIE_SPEED_50) {
9573                 if (current_data_rate == 1) {
9574                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9575                         return;
9576                 }
9577                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9578         }
9579
9580         bridge_pos = pci_pcie_cap(root);
9581         if (!bridge_pos)
9582                 return;
9583
9584         gpu_pos = pci_pcie_cap(rdev->pdev);
9585         if (!gpu_pos)
9586                 return;
9587
9588         if (mask & DRM_PCIE_SPEED_80) {
9589                 /* re-try equalization if gen3 is not already enabled */
9590                 if (current_data_rate != 2) {
9591                         u16 bridge_cfg, gpu_cfg;
9592                         u16 bridge_cfg2, gpu_cfg2;
9593                         u32 max_lw, current_lw, tmp;
9594
9595                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9596                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9597
9598                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9599                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9600
9601                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9602                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9603
9604                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9605                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9606                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9607
9608                         if (current_lw < max_lw) {
9609                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9610                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9611                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9612                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9613                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9614                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9615                                 }
9616                         }
9617
9618                         for (i = 0; i < 10; i++) {
9619                                 /* check status */
9620                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9621                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9622                                         break;
9623
9624                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9625                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9626
9627                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9628                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9629
9630                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9631                                 tmp |= LC_SET_QUIESCE;
9632                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9633
9634                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9635                                 tmp |= LC_REDO_EQ;
9636                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9637
9638                                 mdelay(100);
9639
9640                                 /* linkctl */
9641                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9642                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9643                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9644                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9645
9646                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9647                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9648                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9649                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9650
9651                                 /* linkctl2 */
9652                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9653                                 tmp16 &= ~((1 << 4) | (7 << 9));
9654                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9655                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9656
9657                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9658                                 tmp16 &= ~((1 << 4) | (7 << 9));
9659                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9660                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9661
9662                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9663                                 tmp &= ~LC_SET_QUIESCE;
9664                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9665                         }
9666                 }
9667         }
9668
9669         /* set the link speed */
9670         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9671         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9672         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9673
9674         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9675         tmp16 &= ~0xf;
9676         if (mask & DRM_PCIE_SPEED_80)
9677                 tmp16 |= 3; /* gen3 */
9678         else if (mask & DRM_PCIE_SPEED_50)
9679                 tmp16 |= 2; /* gen2 */
9680         else
9681                 tmp16 |= 1; /* gen1 */
9682         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9683
9684         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9685         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9686         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9687
9688         for (i = 0; i < rdev->usec_timeout; i++) {
9689                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9690                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9691                         break;
9692                 udelay(1);
9693         }
9694 }
9695
9696 static void cik_program_aspm(struct radeon_device *rdev)
9697 {
9698         u32 data, orig;
9699         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9700         bool disable_clkreq = false;
9701
9702         if (radeon_aspm == 0)
9703                 return;
9704
9705         /* XXX double check IGPs */
9706         if (rdev->flags & RADEON_IS_IGP)
9707                 return;
9708
9709         if (!(rdev->flags & RADEON_IS_PCIE))
9710                 return;
9711
9712         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9713         data &= ~LC_XMIT_N_FTS_MASK;
9714         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9715         if (orig != data)
9716                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9717
9718         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9719         data |= LC_GO_TO_RECOVERY;
9720         if (orig != data)
9721                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9722
9723         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9724         data |= P_IGNORE_EDB_ERR;
9725         if (orig != data)
9726                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9727
9728         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9729         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9730         data |= LC_PMI_TO_L1_DIS;
9731         if (!disable_l0s)
9732                 data |= LC_L0S_INACTIVITY(7);
9733
9734         if (!disable_l1) {
9735                 data |= LC_L1_INACTIVITY(7);
9736                 data &= ~LC_PMI_TO_L1_DIS;
9737                 if (orig != data)
9738                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9739
9740                 if (!disable_plloff_in_l1) {
9741                         bool clk_req_support;
9742
9743                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9744                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9745                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9746                         if (orig != data)
9747                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9748
9749                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9750                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9751                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9752                         if (orig != data)
9753                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9754
9755                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9756                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9757                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9758                         if (orig != data)
9759                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9760
9761                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9762                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9763                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9764                         if (orig != data)
9765                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9766
9767                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9768                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9769                         data |= LC_DYN_LANES_PWR_STATE(3);
9770                         if (orig != data)
9771                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9772
9773                         if (!disable_clkreq &&
9774                             !pci_is_root_bus(rdev->pdev->bus)) {
9775                                 struct pci_dev *root = rdev->pdev->bus->self;
9776                                 u32 lnkcap;
9777
9778                                 clk_req_support = false;
9779                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9780                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9781                                         clk_req_support = true;
9782                         } else {
9783                                 clk_req_support = false;
9784                         }
9785
9786                         if (clk_req_support) {
9787                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9788                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9789                                 if (orig != data)
9790                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9791
9792                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9793                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9794                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9795                                 if (orig != data)
9796                                         WREG32_SMC(THM_CLK_CNTL, data);
9797
9798                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9799                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9800                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9801                                 if (orig != data)
9802                                         WREG32_SMC(MISC_CLK_CTRL, data);
9803
9804                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9805                                 data &= ~BCLK_AS_XCLK;
9806                                 if (orig != data)
9807                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9808
9809                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9810                                 data &= ~FORCE_BIF_REFCLK_EN;
9811                                 if (orig != data)
9812                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9813
9814                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9815                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9816                                 data |= MPLL_CLKOUT_SEL(4);
9817                                 if (orig != data)
9818                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9819                         }
9820                 }
9821         } else {
9822                 if (orig != data)
9823                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9824         }
9825
9826         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9827         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9828         if (orig != data)
9829                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9830
9831         if (!disable_l0s) {
9832                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9833                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9834                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9835                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9836                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9837                                 data &= ~LC_L0S_INACTIVITY_MASK;
9838                                 if (orig != data)
9839                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9840                         }
9841                 }
9842         }
9843 }