]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
cef4cb7e54383b167fa12c0f0a5e94a1e7bb1d5b
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36
37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46
47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48 MODULE_FIRMWARE("radeon/bonaire_me.bin");
49 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55
56 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
65
66 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67 MODULE_FIRMWARE("radeon/hawaii_me.bin");
68 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
73 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
74
75 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
81
82 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
83 MODULE_FIRMWARE("radeon/kaveri_me.bin");
84 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
87 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
88 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
89
90 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
91 MODULE_FIRMWARE("radeon/KABINI_me.bin");
92 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
93 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
94 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
95 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
96
97 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
98 MODULE_FIRMWARE("radeon/kabini_me.bin");
99 MODULE_FIRMWARE("radeon/kabini_ce.bin");
100 MODULE_FIRMWARE("radeon/kabini_mec.bin");
101 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
102 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
103
104 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
110
111 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
112 MODULE_FIRMWARE("radeon/mullins_me.bin");
113 MODULE_FIRMWARE("radeon/mullins_ce.bin");
114 MODULE_FIRMWARE("radeon/mullins_mec.bin");
115 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
116 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
117
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
123 extern void sumo_rlc_fini(struct radeon_device *rdev);
124 extern int sumo_rlc_init(struct radeon_device *rdev);
125 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
126 extern void si_rlc_reset(struct radeon_device *rdev);
127 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
128 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 extern int cik_sdma_resume(struct radeon_device *rdev);
130 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
131 extern void cik_sdma_fini(struct radeon_device *rdev);
132 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
133 static void cik_rlc_stop(struct radeon_device *rdev);
134 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135 static void cik_program_aspm(struct radeon_device *rdev);
136 static void cik_init_pg(struct radeon_device *rdev);
137 static void cik_init_cg(struct radeon_device *rdev);
138 static void cik_fini_pg(struct radeon_device *rdev);
139 static void cik_fini_cg(struct radeon_device *rdev);
140 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141                                           bool enable);
142
143 /* get temperature in millidegrees */
144 int ci_get_temp(struct radeon_device *rdev)
145 {
146         u32 temp;
147         int actual_temp = 0;
148
149         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
150                 CTF_TEMP_SHIFT;
151
152         if (temp & 0x200)
153                 actual_temp = 255;
154         else
155                 actual_temp = temp & 0x1ff;
156
157         actual_temp = actual_temp * 1000;
158
159         return actual_temp;
160 }
161
162 /* get temperature in millidegrees */
163 int kv_get_temp(struct radeon_device *rdev)
164 {
165         u32 temp;
166         int actual_temp = 0;
167
168         temp = RREG32_SMC(0xC0300E0C);
169
170         if (temp)
171                 actual_temp = (temp / 8) - 49;
172         else
173                 actual_temp = 0;
174
175         actual_temp = actual_temp * 1000;
176
177         return actual_temp;
178 }
179
180 /*
181  * Indirect registers accessor
182  */
183 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
184 {
185         unsigned long flags;
186         u32 r;
187
188         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
189         WREG32(PCIE_INDEX, reg);
190         (void)RREG32(PCIE_INDEX);
191         r = RREG32(PCIE_DATA);
192         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
193         return r;
194 }
195
196 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198         unsigned long flags;
199
200         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
201         WREG32(PCIE_INDEX, reg);
202         (void)RREG32(PCIE_INDEX);
203         WREG32(PCIE_DATA, v);
204         (void)RREG32(PCIE_DATA);
205         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
206 }
207
208 static const u32 spectre_rlc_save_restore_register_list[] =
209 {
210         (0x0e00 << 16) | (0xc12c >> 2),
211         0x00000000,
212         (0x0e00 << 16) | (0xc140 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0xc150 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0xc15c >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0xc168 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc170 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc178 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc204 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc2b4 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc2b8 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc2bc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc2c0 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0x8228 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0x829c >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0x869c >> 2),
239         0x00000000,
240         (0x0600 << 16) | (0x98f4 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0x98f8 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0x9900 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc260 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0x90e8 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0x3c000 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0x3c00c >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0x8c1c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0x9700 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xcd20 >> 2),
259         0x00000000,
260         (0x4e00 << 16) | (0xcd20 >> 2),
261         0x00000000,
262         (0x5e00 << 16) | (0xcd20 >> 2),
263         0x00000000,
264         (0x6e00 << 16) | (0xcd20 >> 2),
265         0x00000000,
266         (0x7e00 << 16) | (0xcd20 >> 2),
267         0x00000000,
268         (0x8e00 << 16) | (0xcd20 >> 2),
269         0x00000000,
270         (0x9e00 << 16) | (0xcd20 >> 2),
271         0x00000000,
272         (0xae00 << 16) | (0xcd20 >> 2),
273         0x00000000,
274         (0xbe00 << 16) | (0xcd20 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x89bc >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x8900 >> 2),
279         0x00000000,
280         0x3,
281         (0x0e00 << 16) | (0xc130 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc134 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc1fc >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc208 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc264 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc268 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc26c >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc270 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0xc274 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0xc278 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0xc27c >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0xc280 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc284 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc288 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc28c >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0xc290 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0xc294 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0xc298 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xc29c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xc2a0 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xc2a4 >> 2),
322         0x00000000,
323         (0x0e00 << 16) | (0xc2a8 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0xc2ac  >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0xc2b0 >> 2),
328         0x00000000,
329         (0x0e00 << 16) | (0x301d0 >> 2),
330         0x00000000,
331         (0x0e00 << 16) | (0x30238 >> 2),
332         0x00000000,
333         (0x0e00 << 16) | (0x30250 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x30254 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x30258 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x3025c >> 2),
340         0x00000000,
341         (0x4e00 << 16) | (0xc900 >> 2),
342         0x00000000,
343         (0x5e00 << 16) | (0xc900 >> 2),
344         0x00000000,
345         (0x6e00 << 16) | (0xc900 >> 2),
346         0x00000000,
347         (0x7e00 << 16) | (0xc900 >> 2),
348         0x00000000,
349         (0x8e00 << 16) | (0xc900 >> 2),
350         0x00000000,
351         (0x9e00 << 16) | (0xc900 >> 2),
352         0x00000000,
353         (0xae00 << 16) | (0xc900 >> 2),
354         0x00000000,
355         (0xbe00 << 16) | (0xc900 >> 2),
356         0x00000000,
357         (0x4e00 << 16) | (0xc904 >> 2),
358         0x00000000,
359         (0x5e00 << 16) | (0xc904 >> 2),
360         0x00000000,
361         (0x6e00 << 16) | (0xc904 >> 2),
362         0x00000000,
363         (0x7e00 << 16) | (0xc904 >> 2),
364         0x00000000,
365         (0x8e00 << 16) | (0xc904 >> 2),
366         0x00000000,
367         (0x9e00 << 16) | (0xc904 >> 2),
368         0x00000000,
369         (0xae00 << 16) | (0xc904 >> 2),
370         0x00000000,
371         (0xbe00 << 16) | (0xc904 >> 2),
372         0x00000000,
373         (0x4e00 << 16) | (0xc908 >> 2),
374         0x00000000,
375         (0x5e00 << 16) | (0xc908 >> 2),
376         0x00000000,
377         (0x6e00 << 16) | (0xc908 >> 2),
378         0x00000000,
379         (0x7e00 << 16) | (0xc908 >> 2),
380         0x00000000,
381         (0x8e00 << 16) | (0xc908 >> 2),
382         0x00000000,
383         (0x9e00 << 16) | (0xc908 >> 2),
384         0x00000000,
385         (0xae00 << 16) | (0xc908 >> 2),
386         0x00000000,
387         (0xbe00 << 16) | (0xc908 >> 2),
388         0x00000000,
389         (0x4e00 << 16) | (0xc90c >> 2),
390         0x00000000,
391         (0x5e00 << 16) | (0xc90c >> 2),
392         0x00000000,
393         (0x6e00 << 16) | (0xc90c >> 2),
394         0x00000000,
395         (0x7e00 << 16) | (0xc90c >> 2),
396         0x00000000,
397         (0x8e00 << 16) | (0xc90c >> 2),
398         0x00000000,
399         (0x9e00 << 16) | (0xc90c >> 2),
400         0x00000000,
401         (0xae00 << 16) | (0xc90c >> 2),
402         0x00000000,
403         (0xbe00 << 16) | (0xc90c >> 2),
404         0x00000000,
405         (0x4e00 << 16) | (0xc910 >> 2),
406         0x00000000,
407         (0x5e00 << 16) | (0xc910 >> 2),
408         0x00000000,
409         (0x6e00 << 16) | (0xc910 >> 2),
410         0x00000000,
411         (0x7e00 << 16) | (0xc910 >> 2),
412         0x00000000,
413         (0x8e00 << 16) | (0xc910 >> 2),
414         0x00000000,
415         (0x9e00 << 16) | (0xc910 >> 2),
416         0x00000000,
417         (0xae00 << 16) | (0xc910 >> 2),
418         0x00000000,
419         (0xbe00 << 16) | (0xc910 >> 2),
420         0x00000000,
421         (0x0e00 << 16) | (0xc99c >> 2),
422         0x00000000,
423         (0x0e00 << 16) | (0x9834 >> 2),
424         0x00000000,
425         (0x0000 << 16) | (0x30f00 >> 2),
426         0x00000000,
427         (0x0001 << 16) | (0x30f00 >> 2),
428         0x00000000,
429         (0x0000 << 16) | (0x30f04 >> 2),
430         0x00000000,
431         (0x0001 << 16) | (0x30f04 >> 2),
432         0x00000000,
433         (0x0000 << 16) | (0x30f08 >> 2),
434         0x00000000,
435         (0x0001 << 16) | (0x30f08 >> 2),
436         0x00000000,
437         (0x0000 << 16) | (0x30f0c >> 2),
438         0x00000000,
439         (0x0001 << 16) | (0x30f0c >> 2),
440         0x00000000,
441         (0x0600 << 16) | (0x9b7c >> 2),
442         0x00000000,
443         (0x0e00 << 16) | (0x8a14 >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0x8a18 >> 2),
446         0x00000000,
447         (0x0600 << 16) | (0x30a00 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x8bf0 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x8bcc >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x8b24 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x30a04 >> 2),
456         0x00000000,
457         (0x0600 << 16) | (0x30a10 >> 2),
458         0x00000000,
459         (0x0600 << 16) | (0x30a14 >> 2),
460         0x00000000,
461         (0x0600 << 16) | (0x30a18 >> 2),
462         0x00000000,
463         (0x0600 << 16) | (0x30a2c >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0xc700 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0xc704 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0xc708 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0xc768 >> 2),
472         0x00000000,
473         (0x0400 << 16) | (0xc770 >> 2),
474         0x00000000,
475         (0x0400 << 16) | (0xc774 >> 2),
476         0x00000000,
477         (0x0400 << 16) | (0xc778 >> 2),
478         0x00000000,
479         (0x0400 << 16) | (0xc77c >> 2),
480         0x00000000,
481         (0x0400 << 16) | (0xc780 >> 2),
482         0x00000000,
483         (0x0400 << 16) | (0xc784 >> 2),
484         0x00000000,
485         (0x0400 << 16) | (0xc788 >> 2),
486         0x00000000,
487         (0x0400 << 16) | (0xc78c >> 2),
488         0x00000000,
489         (0x0400 << 16) | (0xc798 >> 2),
490         0x00000000,
491         (0x0400 << 16) | (0xc79c >> 2),
492         0x00000000,
493         (0x0400 << 16) | (0xc7a0 >> 2),
494         0x00000000,
495         (0x0400 << 16) | (0xc7a4 >> 2),
496         0x00000000,
497         (0x0400 << 16) | (0xc7a8 >> 2),
498         0x00000000,
499         (0x0400 << 16) | (0xc7ac >> 2),
500         0x00000000,
501         (0x0400 << 16) | (0xc7b0 >> 2),
502         0x00000000,
503         (0x0400 << 16) | (0xc7b4 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x9100 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x3c010 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x92a8 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0x92ac >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x92b4 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x92b8 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x92bc >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x92c0 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x92c4 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x92c8 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0x92cc >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x92d0 >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0x8c00 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0x8c04 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0x8c20 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0x8c38 >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0x8c3c >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xae00 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0x9604 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xac08 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xac0c >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xac10 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xac14 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0xac58 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0xac68 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0xac6c >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0xac70 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0xac74 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0xac78 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0xac7c >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0xac80 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0xac84 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0xac88 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0xac8c >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x970c >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x9714 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x9718 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x971c >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x31068 >> 2),
582         0x00000000,
583         (0x4e00 << 16) | (0x31068 >> 2),
584         0x00000000,
585         (0x5e00 << 16) | (0x31068 >> 2),
586         0x00000000,
587         (0x6e00 << 16) | (0x31068 >> 2),
588         0x00000000,
589         (0x7e00 << 16) | (0x31068 >> 2),
590         0x00000000,
591         (0x8e00 << 16) | (0x31068 >> 2),
592         0x00000000,
593         (0x9e00 << 16) | (0x31068 >> 2),
594         0x00000000,
595         (0xae00 << 16) | (0x31068 >> 2),
596         0x00000000,
597         (0xbe00 << 16) | (0x31068 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xcd10 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xcd14 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x88b0 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x88b4 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x88b8 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x88bc >> 2),
610         0x00000000,
611         (0x0400 << 16) | (0x89c0 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x88c4 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x88c8 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x88d0 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x88d4 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x88d8 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x8980 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x30938 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x3093c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x30940 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x89a0 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x30900 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x30904 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x89b4 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x3c210 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x3c214 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x3c218 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x8904 >> 2),
646         0x00000000,
647         0x5,
648         (0x0e00 << 16) | (0x8c28 >> 2),
649         (0x0e00 << 16) | (0x8c2c >> 2),
650         (0x0e00 << 16) | (0x8c30 >> 2),
651         (0x0e00 << 16) | (0x8c34 >> 2),
652         (0x0e00 << 16) | (0x9600 >> 2),
653 };
654
655 static const u32 kalindi_rlc_save_restore_register_list[] =
656 {
657         (0x0e00 << 16) | (0xc12c >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc140 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc150 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc15c >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc168 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc170 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc204 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc2b4 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc2b8 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc2bc >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc2c0 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8228 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x829c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x869c >> 2),
684         0x00000000,
685         (0x0600 << 16) | (0x98f4 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x98f8 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x9900 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc260 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x90e8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x3c000 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x3c00c >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x8c1c >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x9700 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xcd20 >> 2),
704         0x00000000,
705         (0x4e00 << 16) | (0xcd20 >> 2),
706         0x00000000,
707         (0x5e00 << 16) | (0xcd20 >> 2),
708         0x00000000,
709         (0x6e00 << 16) | (0xcd20 >> 2),
710         0x00000000,
711         (0x7e00 << 16) | (0xcd20 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0x89bc >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0x8900 >> 2),
716         0x00000000,
717         0x3,
718         (0x0e00 << 16) | (0xc130 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc134 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc1fc >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc208 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc264 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc268 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc26c >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc270 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc274 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc28c >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc290 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc294 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0xc298 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0xc2a0 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0xc2a4 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0xc2a8 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc2ac >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x301d0 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x30238 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x30250 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x30254 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x30258 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x3025c >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xc900 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xc900 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xc900 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xc900 >> 2),
771         0x00000000,
772         (0x4e00 << 16) | (0xc904 >> 2),
773         0x00000000,
774         (0x5e00 << 16) | (0xc904 >> 2),
775         0x00000000,
776         (0x6e00 << 16) | (0xc904 >> 2),
777         0x00000000,
778         (0x7e00 << 16) | (0xc904 >> 2),
779         0x00000000,
780         (0x4e00 << 16) | (0xc908 >> 2),
781         0x00000000,
782         (0x5e00 << 16) | (0xc908 >> 2),
783         0x00000000,
784         (0x6e00 << 16) | (0xc908 >> 2),
785         0x00000000,
786         (0x7e00 << 16) | (0xc908 >> 2),
787         0x00000000,
788         (0x4e00 << 16) | (0xc90c >> 2),
789         0x00000000,
790         (0x5e00 << 16) | (0xc90c >> 2),
791         0x00000000,
792         (0x6e00 << 16) | (0xc90c >> 2),
793         0x00000000,
794         (0x7e00 << 16) | (0xc90c >> 2),
795         0x00000000,
796         (0x4e00 << 16) | (0xc910 >> 2),
797         0x00000000,
798         (0x5e00 << 16) | (0xc910 >> 2),
799         0x00000000,
800         (0x6e00 << 16) | (0xc910 >> 2),
801         0x00000000,
802         (0x7e00 << 16) | (0xc910 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xc99c >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x9834 >> 2),
807         0x00000000,
808         (0x0000 << 16) | (0x30f00 >> 2),
809         0x00000000,
810         (0x0000 << 16) | (0x30f04 >> 2),
811         0x00000000,
812         (0x0000 << 16) | (0x30f08 >> 2),
813         0x00000000,
814         (0x0000 << 16) | (0x30f0c >> 2),
815         0x00000000,
816         (0x0600 << 16) | (0x9b7c >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x8a14 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0x8a18 >> 2),
821         0x00000000,
822         (0x0600 << 16) | (0x30a00 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0x8bf0 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0x8bcc >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0x8b24 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0x30a04 >> 2),
831         0x00000000,
832         (0x0600 << 16) | (0x30a10 >> 2),
833         0x00000000,
834         (0x0600 << 16) | (0x30a14 >> 2),
835         0x00000000,
836         (0x0600 << 16) | (0x30a18 >> 2),
837         0x00000000,
838         (0x0600 << 16) | (0x30a2c >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xc700 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xc704 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xc708 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xc768 >> 2),
847         0x00000000,
848         (0x0400 << 16) | (0xc770 >> 2),
849         0x00000000,
850         (0x0400 << 16) | (0xc774 >> 2),
851         0x00000000,
852         (0x0400 << 16) | (0xc798 >> 2),
853         0x00000000,
854         (0x0400 << 16) | (0xc79c >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x9100 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x3c010 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x8c00 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x8c04 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0x8c20 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x8c38 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x8c3c >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xae00 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x9604 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xac08 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xac0c >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xac10 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xac14 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xac58 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0xac68 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0xac6c >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0xac70 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0xac74 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0xac78 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0xac7c >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0xac80 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0xac84 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0xac88 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0xac8c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x970c >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x9714 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x9718 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x971c >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x31068 >> 2),
913         0x00000000,
914         (0x4e00 << 16) | (0x31068 >> 2),
915         0x00000000,
916         (0x5e00 << 16) | (0x31068 >> 2),
917         0x00000000,
918         (0x6e00 << 16) | (0x31068 >> 2),
919         0x00000000,
920         (0x7e00 << 16) | (0x31068 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0xcd10 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xcd14 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x88b0 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x88b4 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x88b8 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x88bc >> 2),
933         0x00000000,
934         (0x0400 << 16) | (0x89c0 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x88c4 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x88c8 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x88d0 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x88d4 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x88d8 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x8980 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x30938 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x3093c >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0x30940 >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0x89a0 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0x30900 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0x30904 >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0x89b4 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0x3e1fc >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0x3c210 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0x3c214 >> 2),
967         0x00000000,
968         (0x0e00 << 16) | (0x3c218 >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x8904 >> 2),
971         0x00000000,
972         0x5,
973         (0x0e00 << 16) | (0x8c28 >> 2),
974         (0x0e00 << 16) | (0x8c2c >> 2),
975         (0x0e00 << 16) | (0x8c30 >> 2),
976         (0x0e00 << 16) | (0x8c34 >> 2),
977         (0x0e00 << 16) | (0x9600 >> 2),
978 };
979
980 static const u32 bonaire_golden_spm_registers[] =
981 {
982         0x30800, 0xe0ffffff, 0xe0000000
983 };
984
985 static const u32 bonaire_golden_common_registers[] =
986 {
987         0xc770, 0xffffffff, 0x00000800,
988         0xc774, 0xffffffff, 0x00000800,
989         0xc798, 0xffffffff, 0x00007fbf,
990         0xc79c, 0xffffffff, 0x00007faf
991 };
992
993 static const u32 bonaire_golden_registers[] =
994 {
995         0x3354, 0x00000333, 0x00000333,
996         0x3350, 0x000c0fc0, 0x00040200,
997         0x9a10, 0x00010000, 0x00058208,
998         0x3c000, 0xffff1fff, 0x00140000,
999         0x3c200, 0xfdfc0fff, 0x00000100,
1000         0x3c234, 0x40000000, 0x40000200,
1001         0x9830, 0xffffffff, 0x00000000,
1002         0x9834, 0xf00fffff, 0x00000400,
1003         0x9838, 0x0002021c, 0x00020200,
1004         0xc78, 0x00000080, 0x00000000,
1005         0x5bb0, 0x000000f0, 0x00000070,
1006         0x5bc0, 0xf0311fff, 0x80300000,
1007         0x98f8, 0x73773777, 0x12010001,
1008         0x350c, 0x00810000, 0x408af000,
1009         0x7030, 0x31000111, 0x00000011,
1010         0x2f48, 0x73773777, 0x12010001,
1011         0x220c, 0x00007fb6, 0x0021a1b1,
1012         0x2210, 0x00007fb6, 0x002021b1,
1013         0x2180, 0x00007fb6, 0x00002191,
1014         0x2218, 0x00007fb6, 0x002121b1,
1015         0x221c, 0x00007fb6, 0x002021b1,
1016         0x21dc, 0x00007fb6, 0x00002191,
1017         0x21e0, 0x00007fb6, 0x00002191,
1018         0x3628, 0x0000003f, 0x0000000a,
1019         0x362c, 0x0000003f, 0x0000000a,
1020         0x2ae4, 0x00073ffe, 0x000022a2,
1021         0x240c, 0x000007ff, 0x00000000,
1022         0x8a14, 0xf000003f, 0x00000007,
1023         0x8bf0, 0x00002001, 0x00000001,
1024         0x8b24, 0xffffffff, 0x00ffffff,
1025         0x30a04, 0x0000ff0f, 0x00000000,
1026         0x28a4c, 0x07ffffff, 0x06000000,
1027         0x4d8, 0x00000fff, 0x00000100,
1028         0x3e78, 0x00000001, 0x00000002,
1029         0x9100, 0x03000000, 0x0362c688,
1030         0x8c00, 0x000000ff, 0x00000001,
1031         0xe40, 0x00001fff, 0x00001fff,
1032         0x9060, 0x0000007f, 0x00000020,
1033         0x9508, 0x00010000, 0x00010000,
1034         0xac14, 0x000003ff, 0x000000f3,
1035         0xac0c, 0xffffffff, 0x00001032
1036 };
1037
1038 static const u32 bonaire_mgcg_cgcg_init[] =
1039 {
1040         0xc420, 0xffffffff, 0xfffffffc,
1041         0x30800, 0xffffffff, 0xe0000000,
1042         0x3c2a0, 0xffffffff, 0x00000100,
1043         0x3c208, 0xffffffff, 0x00000100,
1044         0x3c2c0, 0xffffffff, 0xc0000100,
1045         0x3c2c8, 0xffffffff, 0xc0000100,
1046         0x3c2c4, 0xffffffff, 0xc0000100,
1047         0x55e4, 0xffffffff, 0x00600100,
1048         0x3c280, 0xffffffff, 0x00000100,
1049         0x3c214, 0xffffffff, 0x06000100,
1050         0x3c220, 0xffffffff, 0x00000100,
1051         0x3c218, 0xffffffff, 0x06000100,
1052         0x3c204, 0xffffffff, 0x00000100,
1053         0x3c2e0, 0xffffffff, 0x00000100,
1054         0x3c224, 0xffffffff, 0x00000100,
1055         0x3c200, 0xffffffff, 0x00000100,
1056         0x3c230, 0xffffffff, 0x00000100,
1057         0x3c234, 0xffffffff, 0x00000100,
1058         0x3c250, 0xffffffff, 0x00000100,
1059         0x3c254, 0xffffffff, 0x00000100,
1060         0x3c258, 0xffffffff, 0x00000100,
1061         0x3c25c, 0xffffffff, 0x00000100,
1062         0x3c260, 0xffffffff, 0x00000100,
1063         0x3c27c, 0xffffffff, 0x00000100,
1064         0x3c278, 0xffffffff, 0x00000100,
1065         0x3c210, 0xffffffff, 0x06000100,
1066         0x3c290, 0xffffffff, 0x00000100,
1067         0x3c274, 0xffffffff, 0x00000100,
1068         0x3c2b4, 0xffffffff, 0x00000100,
1069         0x3c2b0, 0xffffffff, 0x00000100,
1070         0x3c270, 0xffffffff, 0x00000100,
1071         0x30800, 0xffffffff, 0xe0000000,
1072         0x3c020, 0xffffffff, 0x00010000,
1073         0x3c024, 0xffffffff, 0x00030002,
1074         0x3c028, 0xffffffff, 0x00040007,
1075         0x3c02c, 0xffffffff, 0x00060005,
1076         0x3c030, 0xffffffff, 0x00090008,
1077         0x3c034, 0xffffffff, 0x00010000,
1078         0x3c038, 0xffffffff, 0x00030002,
1079         0x3c03c, 0xffffffff, 0x00040007,
1080         0x3c040, 0xffffffff, 0x00060005,
1081         0x3c044, 0xffffffff, 0x00090008,
1082         0x3c048, 0xffffffff, 0x00010000,
1083         0x3c04c, 0xffffffff, 0x00030002,
1084         0x3c050, 0xffffffff, 0x00040007,
1085         0x3c054, 0xffffffff, 0x00060005,
1086         0x3c058, 0xffffffff, 0x00090008,
1087         0x3c05c, 0xffffffff, 0x00010000,
1088         0x3c060, 0xffffffff, 0x00030002,
1089         0x3c064, 0xffffffff, 0x00040007,
1090         0x3c068, 0xffffffff, 0x00060005,
1091         0x3c06c, 0xffffffff, 0x00090008,
1092         0x3c070, 0xffffffff, 0x00010000,
1093         0x3c074, 0xffffffff, 0x00030002,
1094         0x3c078, 0xffffffff, 0x00040007,
1095         0x3c07c, 0xffffffff, 0x00060005,
1096         0x3c080, 0xffffffff, 0x00090008,
1097         0x3c084, 0xffffffff, 0x00010000,
1098         0x3c088, 0xffffffff, 0x00030002,
1099         0x3c08c, 0xffffffff, 0x00040007,
1100         0x3c090, 0xffffffff, 0x00060005,
1101         0x3c094, 0xffffffff, 0x00090008,
1102         0x3c098, 0xffffffff, 0x00010000,
1103         0x3c09c, 0xffffffff, 0x00030002,
1104         0x3c0a0, 0xffffffff, 0x00040007,
1105         0x3c0a4, 0xffffffff, 0x00060005,
1106         0x3c0a8, 0xffffffff, 0x00090008,
1107         0x3c000, 0xffffffff, 0x96e00200,
1108         0x8708, 0xffffffff, 0x00900100,
1109         0xc424, 0xffffffff, 0x0020003f,
1110         0x38, 0xffffffff, 0x0140001c,
1111         0x3c, 0x000f0000, 0x000f0000,
1112         0x220, 0xffffffff, 0xC060000C,
1113         0x224, 0xc0000fff, 0x00000100,
1114         0xf90, 0xffffffff, 0x00000100,
1115         0xf98, 0x00000101, 0x00000000,
1116         0x20a8, 0xffffffff, 0x00000104,
1117         0x55e4, 0xff000fff, 0x00000100,
1118         0x30cc, 0xc0000fff, 0x00000104,
1119         0xc1e4, 0x00000001, 0x00000001,
1120         0xd00c, 0xff000ff0, 0x00000100,
1121         0xd80c, 0xff000ff0, 0x00000100
1122 };
1123
1124 static const u32 spectre_golden_spm_registers[] =
1125 {
1126         0x30800, 0xe0ffffff, 0xe0000000
1127 };
1128
1129 static const u32 spectre_golden_common_registers[] =
1130 {
1131         0xc770, 0xffffffff, 0x00000800,
1132         0xc774, 0xffffffff, 0x00000800,
1133         0xc798, 0xffffffff, 0x00007fbf,
1134         0xc79c, 0xffffffff, 0x00007faf
1135 };
1136
1137 static const u32 spectre_golden_registers[] =
1138 {
1139         0x3c000, 0xffff1fff, 0x96940200,
1140         0x3c00c, 0xffff0001, 0xff000000,
1141         0x3c200, 0xfffc0fff, 0x00000100,
1142         0x6ed8, 0x00010101, 0x00010000,
1143         0x9834, 0xf00fffff, 0x00000400,
1144         0x9838, 0xfffffffc, 0x00020200,
1145         0x5bb0, 0x000000f0, 0x00000070,
1146         0x5bc0, 0xf0311fff, 0x80300000,
1147         0x98f8, 0x73773777, 0x12010001,
1148         0x9b7c, 0x00ff0000, 0x00fc0000,
1149         0x2f48, 0x73773777, 0x12010001,
1150         0x8a14, 0xf000003f, 0x00000007,
1151         0x8b24, 0xffffffff, 0x00ffffff,
1152         0x28350, 0x3f3f3fff, 0x00000082,
1153         0x28354, 0x0000003f, 0x00000000,
1154         0x3e78, 0x00000001, 0x00000002,
1155         0x913c, 0xffff03df, 0x00000004,
1156         0xc768, 0x00000008, 0x00000008,
1157         0x8c00, 0x000008ff, 0x00000800,
1158         0x9508, 0x00010000, 0x00010000,
1159         0xac0c, 0xffffffff, 0x54763210,
1160         0x214f8, 0x01ff01ff, 0x00000002,
1161         0x21498, 0x007ff800, 0x00200000,
1162         0x2015c, 0xffffffff, 0x00000f40,
1163         0x30934, 0xffffffff, 0x00000001
1164 };
1165
1166 static const u32 spectre_mgcg_cgcg_init[] =
1167 {
1168         0xc420, 0xffffffff, 0xfffffffc,
1169         0x30800, 0xffffffff, 0xe0000000,
1170         0x3c2a0, 0xffffffff, 0x00000100,
1171         0x3c208, 0xffffffff, 0x00000100,
1172         0x3c2c0, 0xffffffff, 0x00000100,
1173         0x3c2c8, 0xffffffff, 0x00000100,
1174         0x3c2c4, 0xffffffff, 0x00000100,
1175         0x55e4, 0xffffffff, 0x00600100,
1176         0x3c280, 0xffffffff, 0x00000100,
1177         0x3c214, 0xffffffff, 0x06000100,
1178         0x3c220, 0xffffffff, 0x00000100,
1179         0x3c218, 0xffffffff, 0x06000100,
1180         0x3c204, 0xffffffff, 0x00000100,
1181         0x3c2e0, 0xffffffff, 0x00000100,
1182         0x3c224, 0xffffffff, 0x00000100,
1183         0x3c200, 0xffffffff, 0x00000100,
1184         0x3c230, 0xffffffff, 0x00000100,
1185         0x3c234, 0xffffffff, 0x00000100,
1186         0x3c250, 0xffffffff, 0x00000100,
1187         0x3c254, 0xffffffff, 0x00000100,
1188         0x3c258, 0xffffffff, 0x00000100,
1189         0x3c25c, 0xffffffff, 0x00000100,
1190         0x3c260, 0xffffffff, 0x00000100,
1191         0x3c27c, 0xffffffff, 0x00000100,
1192         0x3c278, 0xffffffff, 0x00000100,
1193         0x3c210, 0xffffffff, 0x06000100,
1194         0x3c290, 0xffffffff, 0x00000100,
1195         0x3c274, 0xffffffff, 0x00000100,
1196         0x3c2b4, 0xffffffff, 0x00000100,
1197         0x3c2b0, 0xffffffff, 0x00000100,
1198         0x3c270, 0xffffffff, 0x00000100,
1199         0x30800, 0xffffffff, 0xe0000000,
1200         0x3c020, 0xffffffff, 0x00010000,
1201         0x3c024, 0xffffffff, 0x00030002,
1202         0x3c028, 0xffffffff, 0x00040007,
1203         0x3c02c, 0xffffffff, 0x00060005,
1204         0x3c030, 0xffffffff, 0x00090008,
1205         0x3c034, 0xffffffff, 0x00010000,
1206         0x3c038, 0xffffffff, 0x00030002,
1207         0x3c03c, 0xffffffff, 0x00040007,
1208         0x3c040, 0xffffffff, 0x00060005,
1209         0x3c044, 0xffffffff, 0x00090008,
1210         0x3c048, 0xffffffff, 0x00010000,
1211         0x3c04c, 0xffffffff, 0x00030002,
1212         0x3c050, 0xffffffff, 0x00040007,
1213         0x3c054, 0xffffffff, 0x00060005,
1214         0x3c058, 0xffffffff, 0x00090008,
1215         0x3c05c, 0xffffffff, 0x00010000,
1216         0x3c060, 0xffffffff, 0x00030002,
1217         0x3c064, 0xffffffff, 0x00040007,
1218         0x3c068, 0xffffffff, 0x00060005,
1219         0x3c06c, 0xffffffff, 0x00090008,
1220         0x3c070, 0xffffffff, 0x00010000,
1221         0x3c074, 0xffffffff, 0x00030002,
1222         0x3c078, 0xffffffff, 0x00040007,
1223         0x3c07c, 0xffffffff, 0x00060005,
1224         0x3c080, 0xffffffff, 0x00090008,
1225         0x3c084, 0xffffffff, 0x00010000,
1226         0x3c088, 0xffffffff, 0x00030002,
1227         0x3c08c, 0xffffffff, 0x00040007,
1228         0x3c090, 0xffffffff, 0x00060005,
1229         0x3c094, 0xffffffff, 0x00090008,
1230         0x3c098, 0xffffffff, 0x00010000,
1231         0x3c09c, 0xffffffff, 0x00030002,
1232         0x3c0a0, 0xffffffff, 0x00040007,
1233         0x3c0a4, 0xffffffff, 0x00060005,
1234         0x3c0a8, 0xffffffff, 0x00090008,
1235         0x3c0ac, 0xffffffff, 0x00010000,
1236         0x3c0b0, 0xffffffff, 0x00030002,
1237         0x3c0b4, 0xffffffff, 0x00040007,
1238         0x3c0b8, 0xffffffff, 0x00060005,
1239         0x3c0bc, 0xffffffff, 0x00090008,
1240         0x3c000, 0xffffffff, 0x96e00200,
1241         0x8708, 0xffffffff, 0x00900100,
1242         0xc424, 0xffffffff, 0x0020003f,
1243         0x38, 0xffffffff, 0x0140001c,
1244         0x3c, 0x000f0000, 0x000f0000,
1245         0x220, 0xffffffff, 0xC060000C,
1246         0x224, 0xc0000fff, 0x00000100,
1247         0xf90, 0xffffffff, 0x00000100,
1248         0xf98, 0x00000101, 0x00000000,
1249         0x20a8, 0xffffffff, 0x00000104,
1250         0x55e4, 0xff000fff, 0x00000100,
1251         0x30cc, 0xc0000fff, 0x00000104,
1252         0xc1e4, 0x00000001, 0x00000001,
1253         0xd00c, 0xff000ff0, 0x00000100,
1254         0xd80c, 0xff000ff0, 0x00000100
1255 };
1256
1257 static const u32 kalindi_golden_spm_registers[] =
1258 {
1259         0x30800, 0xe0ffffff, 0xe0000000
1260 };
1261
1262 static const u32 kalindi_golden_common_registers[] =
1263 {
1264         0xc770, 0xffffffff, 0x00000800,
1265         0xc774, 0xffffffff, 0x00000800,
1266         0xc798, 0xffffffff, 0x00007fbf,
1267         0xc79c, 0xffffffff, 0x00007faf
1268 };
1269
1270 static const u32 kalindi_golden_registers[] =
1271 {
1272         0x3c000, 0xffffdfff, 0x6e944040,
1273         0x55e4, 0xff607fff, 0xfc000100,
1274         0x3c220, 0xff000fff, 0x00000100,
1275         0x3c224, 0xff000fff, 0x00000100,
1276         0x3c200, 0xfffc0fff, 0x00000100,
1277         0x6ed8, 0x00010101, 0x00010000,
1278         0x9830, 0xffffffff, 0x00000000,
1279         0x9834, 0xf00fffff, 0x00000400,
1280         0x5bb0, 0x000000f0, 0x00000070,
1281         0x5bc0, 0xf0311fff, 0x80300000,
1282         0x98f8, 0x73773777, 0x12010001,
1283         0x98fc, 0xffffffff, 0x00000010,
1284         0x9b7c, 0x00ff0000, 0x00fc0000,
1285         0x8030, 0x00001f0f, 0x0000100a,
1286         0x2f48, 0x73773777, 0x12010001,
1287         0x2408, 0x000fffff, 0x000c007f,
1288         0x8a14, 0xf000003f, 0x00000007,
1289         0x8b24, 0x3fff3fff, 0x00ffcfff,
1290         0x30a04, 0x0000ff0f, 0x00000000,
1291         0x28a4c, 0x07ffffff, 0x06000000,
1292         0x4d8, 0x00000fff, 0x00000100,
1293         0x3e78, 0x00000001, 0x00000002,
1294         0xc768, 0x00000008, 0x00000008,
1295         0x8c00, 0x000000ff, 0x00000003,
1296         0x214f8, 0x01ff01ff, 0x00000002,
1297         0x21498, 0x007ff800, 0x00200000,
1298         0x2015c, 0xffffffff, 0x00000f40,
1299         0x88c4, 0x001f3ae3, 0x00000082,
1300         0x88d4, 0x0000001f, 0x00000010,
1301         0x30934, 0xffffffff, 0x00000000
1302 };
1303
1304 static const u32 kalindi_mgcg_cgcg_init[] =
1305 {
1306         0xc420, 0xffffffff, 0xfffffffc,
1307         0x30800, 0xffffffff, 0xe0000000,
1308         0x3c2a0, 0xffffffff, 0x00000100,
1309         0x3c208, 0xffffffff, 0x00000100,
1310         0x3c2c0, 0xffffffff, 0x00000100,
1311         0x3c2c8, 0xffffffff, 0x00000100,
1312         0x3c2c4, 0xffffffff, 0x00000100,
1313         0x55e4, 0xffffffff, 0x00600100,
1314         0x3c280, 0xffffffff, 0x00000100,
1315         0x3c214, 0xffffffff, 0x06000100,
1316         0x3c220, 0xffffffff, 0x00000100,
1317         0x3c218, 0xffffffff, 0x06000100,
1318         0x3c204, 0xffffffff, 0x00000100,
1319         0x3c2e0, 0xffffffff, 0x00000100,
1320         0x3c224, 0xffffffff, 0x00000100,
1321         0x3c200, 0xffffffff, 0x00000100,
1322         0x3c230, 0xffffffff, 0x00000100,
1323         0x3c234, 0xffffffff, 0x00000100,
1324         0x3c250, 0xffffffff, 0x00000100,
1325         0x3c254, 0xffffffff, 0x00000100,
1326         0x3c258, 0xffffffff, 0x00000100,
1327         0x3c25c, 0xffffffff, 0x00000100,
1328         0x3c260, 0xffffffff, 0x00000100,
1329         0x3c27c, 0xffffffff, 0x00000100,
1330         0x3c278, 0xffffffff, 0x00000100,
1331         0x3c210, 0xffffffff, 0x06000100,
1332         0x3c290, 0xffffffff, 0x00000100,
1333         0x3c274, 0xffffffff, 0x00000100,
1334         0x3c2b4, 0xffffffff, 0x00000100,
1335         0x3c2b0, 0xffffffff, 0x00000100,
1336         0x3c270, 0xffffffff, 0x00000100,
1337         0x30800, 0xffffffff, 0xe0000000,
1338         0x3c020, 0xffffffff, 0x00010000,
1339         0x3c024, 0xffffffff, 0x00030002,
1340         0x3c028, 0xffffffff, 0x00040007,
1341         0x3c02c, 0xffffffff, 0x00060005,
1342         0x3c030, 0xffffffff, 0x00090008,
1343         0x3c034, 0xffffffff, 0x00010000,
1344         0x3c038, 0xffffffff, 0x00030002,
1345         0x3c03c, 0xffffffff, 0x00040007,
1346         0x3c040, 0xffffffff, 0x00060005,
1347         0x3c044, 0xffffffff, 0x00090008,
1348         0x3c000, 0xffffffff, 0x96e00200,
1349         0x8708, 0xffffffff, 0x00900100,
1350         0xc424, 0xffffffff, 0x0020003f,
1351         0x38, 0xffffffff, 0x0140001c,
1352         0x3c, 0x000f0000, 0x000f0000,
1353         0x220, 0xffffffff, 0xC060000C,
1354         0x224, 0xc0000fff, 0x00000100,
1355         0x20a8, 0xffffffff, 0x00000104,
1356         0x55e4, 0xff000fff, 0x00000100,
1357         0x30cc, 0xc0000fff, 0x00000104,
1358         0xc1e4, 0x00000001, 0x00000001,
1359         0xd00c, 0xff000ff0, 0x00000100,
1360         0xd80c, 0xff000ff0, 0x00000100
1361 };
1362
1363 static const u32 hawaii_golden_spm_registers[] =
1364 {
1365         0x30800, 0xe0ffffff, 0xe0000000
1366 };
1367
1368 static const u32 hawaii_golden_common_registers[] =
1369 {
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x28350, 0xffffffff, 0x3a00161a,
1372         0x28354, 0xffffffff, 0x0000002e,
1373         0x9a10, 0xffffffff, 0x00018208,
1374         0x98f8, 0xffffffff, 0x12011003
1375 };
1376
1377 static const u32 hawaii_golden_registers[] =
1378 {
1379         0x3354, 0x00000333, 0x00000333,
1380         0x9a10, 0x00010000, 0x00058208,
1381         0x9830, 0xffffffff, 0x00000000,
1382         0x9834, 0xf00fffff, 0x00000400,
1383         0x9838, 0x0002021c, 0x00020200,
1384         0xc78, 0x00000080, 0x00000000,
1385         0x5bb0, 0x000000f0, 0x00000070,
1386         0x5bc0, 0xf0311fff, 0x80300000,
1387         0x350c, 0x00810000, 0x408af000,
1388         0x7030, 0x31000111, 0x00000011,
1389         0x2f48, 0x73773777, 0x12010001,
1390         0x2120, 0x0000007f, 0x0000001b,
1391         0x21dc, 0x00007fb6, 0x00002191,
1392         0x3628, 0x0000003f, 0x0000000a,
1393         0x362c, 0x0000003f, 0x0000000a,
1394         0x2ae4, 0x00073ffe, 0x000022a2,
1395         0x240c, 0x000007ff, 0x00000000,
1396         0x8bf0, 0x00002001, 0x00000001,
1397         0x8b24, 0xffffffff, 0x00ffffff,
1398         0x30a04, 0x0000ff0f, 0x00000000,
1399         0x28a4c, 0x07ffffff, 0x06000000,
1400         0x3e78, 0x00000001, 0x00000002,
1401         0xc768, 0x00000008, 0x00000008,
1402         0xc770, 0x00000f00, 0x00000800,
1403         0xc774, 0x00000f00, 0x00000800,
1404         0xc798, 0x00ffffff, 0x00ff7fbf,
1405         0xc79c, 0x00ffffff, 0x00ff7faf,
1406         0x8c00, 0x000000ff, 0x00000800,
1407         0xe40, 0x00001fff, 0x00001fff,
1408         0x9060, 0x0000007f, 0x00000020,
1409         0x9508, 0x00010000, 0x00010000,
1410         0xae00, 0x00100000, 0x000ff07c,
1411         0xac14, 0x000003ff, 0x0000000f,
1412         0xac10, 0xffffffff, 0x7564fdec,
1413         0xac0c, 0xffffffff, 0x3120b9a8,
1414         0xac08, 0x20000000, 0x0f9c0000
1415 };
1416
1417 static const u32 hawaii_mgcg_cgcg_init[] =
1418 {
1419         0xc420, 0xffffffff, 0xfffffffd,
1420         0x30800, 0xffffffff, 0xe0000000,
1421         0x3c2a0, 0xffffffff, 0x00000100,
1422         0x3c208, 0xffffffff, 0x00000100,
1423         0x3c2c0, 0xffffffff, 0x00000100,
1424         0x3c2c8, 0xffffffff, 0x00000100,
1425         0x3c2c4, 0xffffffff, 0x00000100,
1426         0x55e4, 0xffffffff, 0x00200100,
1427         0x3c280, 0xffffffff, 0x00000100,
1428         0x3c214, 0xffffffff, 0x06000100,
1429         0x3c220, 0xffffffff, 0x00000100,
1430         0x3c218, 0xffffffff, 0x06000100,
1431         0x3c204, 0xffffffff, 0x00000100,
1432         0x3c2e0, 0xffffffff, 0x00000100,
1433         0x3c224, 0xffffffff, 0x00000100,
1434         0x3c200, 0xffffffff, 0x00000100,
1435         0x3c230, 0xffffffff, 0x00000100,
1436         0x3c234, 0xffffffff, 0x00000100,
1437         0x3c250, 0xffffffff, 0x00000100,
1438         0x3c254, 0xffffffff, 0x00000100,
1439         0x3c258, 0xffffffff, 0x00000100,
1440         0x3c25c, 0xffffffff, 0x00000100,
1441         0x3c260, 0xffffffff, 0x00000100,
1442         0x3c27c, 0xffffffff, 0x00000100,
1443         0x3c278, 0xffffffff, 0x00000100,
1444         0x3c210, 0xffffffff, 0x06000100,
1445         0x3c290, 0xffffffff, 0x00000100,
1446         0x3c274, 0xffffffff, 0x00000100,
1447         0x3c2b4, 0xffffffff, 0x00000100,
1448         0x3c2b0, 0xffffffff, 0x00000100,
1449         0x3c270, 0xffffffff, 0x00000100,
1450         0x30800, 0xffffffff, 0xe0000000,
1451         0x3c020, 0xffffffff, 0x00010000,
1452         0x3c024, 0xffffffff, 0x00030002,
1453         0x3c028, 0xffffffff, 0x00040007,
1454         0x3c02c, 0xffffffff, 0x00060005,
1455         0x3c030, 0xffffffff, 0x00090008,
1456         0x3c034, 0xffffffff, 0x00010000,
1457         0x3c038, 0xffffffff, 0x00030002,
1458         0x3c03c, 0xffffffff, 0x00040007,
1459         0x3c040, 0xffffffff, 0x00060005,
1460         0x3c044, 0xffffffff, 0x00090008,
1461         0x3c048, 0xffffffff, 0x00010000,
1462         0x3c04c, 0xffffffff, 0x00030002,
1463         0x3c050, 0xffffffff, 0x00040007,
1464         0x3c054, 0xffffffff, 0x00060005,
1465         0x3c058, 0xffffffff, 0x00090008,
1466         0x3c05c, 0xffffffff, 0x00010000,
1467         0x3c060, 0xffffffff, 0x00030002,
1468         0x3c064, 0xffffffff, 0x00040007,
1469         0x3c068, 0xffffffff, 0x00060005,
1470         0x3c06c, 0xffffffff, 0x00090008,
1471         0x3c070, 0xffffffff, 0x00010000,
1472         0x3c074, 0xffffffff, 0x00030002,
1473         0x3c078, 0xffffffff, 0x00040007,
1474         0x3c07c, 0xffffffff, 0x00060005,
1475         0x3c080, 0xffffffff, 0x00090008,
1476         0x3c084, 0xffffffff, 0x00010000,
1477         0x3c088, 0xffffffff, 0x00030002,
1478         0x3c08c, 0xffffffff, 0x00040007,
1479         0x3c090, 0xffffffff, 0x00060005,
1480         0x3c094, 0xffffffff, 0x00090008,
1481         0x3c098, 0xffffffff, 0x00010000,
1482         0x3c09c, 0xffffffff, 0x00030002,
1483         0x3c0a0, 0xffffffff, 0x00040007,
1484         0x3c0a4, 0xffffffff, 0x00060005,
1485         0x3c0a8, 0xffffffff, 0x00090008,
1486         0x3c0ac, 0xffffffff, 0x00010000,
1487         0x3c0b0, 0xffffffff, 0x00030002,
1488         0x3c0b4, 0xffffffff, 0x00040007,
1489         0x3c0b8, 0xffffffff, 0x00060005,
1490         0x3c0bc, 0xffffffff, 0x00090008,
1491         0x3c0c0, 0xffffffff, 0x00010000,
1492         0x3c0c4, 0xffffffff, 0x00030002,
1493         0x3c0c8, 0xffffffff, 0x00040007,
1494         0x3c0cc, 0xffffffff, 0x00060005,
1495         0x3c0d0, 0xffffffff, 0x00090008,
1496         0x3c0d4, 0xffffffff, 0x00010000,
1497         0x3c0d8, 0xffffffff, 0x00030002,
1498         0x3c0dc, 0xffffffff, 0x00040007,
1499         0x3c0e0, 0xffffffff, 0x00060005,
1500         0x3c0e4, 0xffffffff, 0x00090008,
1501         0x3c0e8, 0xffffffff, 0x00010000,
1502         0x3c0ec, 0xffffffff, 0x00030002,
1503         0x3c0f0, 0xffffffff, 0x00040007,
1504         0x3c0f4, 0xffffffff, 0x00060005,
1505         0x3c0f8, 0xffffffff, 0x00090008,
1506         0xc318, 0xffffffff, 0x00020200,
1507         0x3350, 0xffffffff, 0x00000200,
1508         0x15c0, 0xffffffff, 0x00000400,
1509         0x55e8, 0xffffffff, 0x00000000,
1510         0x2f50, 0xffffffff, 0x00000902,
1511         0x3c000, 0xffffffff, 0x96940200,
1512         0x8708, 0xffffffff, 0x00900100,
1513         0xc424, 0xffffffff, 0x0020003f,
1514         0x38, 0xffffffff, 0x0140001c,
1515         0x3c, 0x000f0000, 0x000f0000,
1516         0x220, 0xffffffff, 0xc060000c,
1517         0x224, 0xc0000fff, 0x00000100,
1518         0xf90, 0xffffffff, 0x00000100,
1519         0xf98, 0x00000101, 0x00000000,
1520         0x20a8, 0xffffffff, 0x00000104,
1521         0x55e4, 0xff000fff, 0x00000100,
1522         0x30cc, 0xc0000fff, 0x00000104,
1523         0xc1e4, 0x00000001, 0x00000001,
1524         0xd00c, 0xff000ff0, 0x00000100,
1525         0xd80c, 0xff000ff0, 0x00000100
1526 };
1527
1528 static const u32 godavari_golden_registers[] =
1529 {
1530         0x55e4, 0xff607fff, 0xfc000100,
1531         0x6ed8, 0x00010101, 0x00010000,
1532         0x9830, 0xffffffff, 0x00000000,
1533         0x98302, 0xf00fffff, 0x00000400,
1534         0x6130, 0xffffffff, 0x00010000,
1535         0x5bb0, 0x000000f0, 0x00000070,
1536         0x5bc0, 0xf0311fff, 0x80300000,
1537         0x98f8, 0x73773777, 0x12010001,
1538         0x98fc, 0xffffffff, 0x00000010,
1539         0x8030, 0x00001f0f, 0x0000100a,
1540         0x2f48, 0x73773777, 0x12010001,
1541         0x2408, 0x000fffff, 0x000c007f,
1542         0x8a14, 0xf000003f, 0x00000007,
1543         0x8b24, 0xffffffff, 0x00ff0fff,
1544         0x30a04, 0x0000ff0f, 0x00000000,
1545         0x28a4c, 0x07ffffff, 0x06000000,
1546         0x4d8, 0x00000fff, 0x00000100,
1547         0xd014, 0x00010000, 0x00810001,
1548         0xd814, 0x00010000, 0x00810001,
1549         0x3e78, 0x00000001, 0x00000002,
1550         0xc768, 0x00000008, 0x00000008,
1551         0xc770, 0x00000f00, 0x00000800,
1552         0xc774, 0x00000f00, 0x00000800,
1553         0xc798, 0x00ffffff, 0x00ff7fbf,
1554         0xc79c, 0x00ffffff, 0x00ff7faf,
1555         0x8c00, 0x000000ff, 0x00000001,
1556         0x214f8, 0x01ff01ff, 0x00000002,
1557         0x21498, 0x007ff800, 0x00200000,
1558         0x2015c, 0xffffffff, 0x00000f40,
1559         0x88c4, 0x001f3ae3, 0x00000082,
1560         0x88d4, 0x0000001f, 0x00000010,
1561         0x30934, 0xffffffff, 0x00000000
1562 };
1563
1564
1565 static void cik_init_golden_registers(struct radeon_device *rdev)
1566 {
1567         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1568         mutex_lock(&rdev->grbm_idx_mutex);
1569         switch (rdev->family) {
1570         case CHIP_BONAIRE:
1571                 radeon_program_register_sequence(rdev,
1572                                                  bonaire_mgcg_cgcg_init,
1573                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1574                 radeon_program_register_sequence(rdev,
1575                                                  bonaire_golden_registers,
1576                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1577                 radeon_program_register_sequence(rdev,
1578                                                  bonaire_golden_common_registers,
1579                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1580                 radeon_program_register_sequence(rdev,
1581                                                  bonaire_golden_spm_registers,
1582                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1583                 break;
1584         case CHIP_KABINI:
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_mgcg_cgcg_init,
1587                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1591                 radeon_program_register_sequence(rdev,
1592                                                  kalindi_golden_common_registers,
1593                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1594                 radeon_program_register_sequence(rdev,
1595                                                  kalindi_golden_spm_registers,
1596                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1597                 break;
1598         case CHIP_MULLINS:
1599                 radeon_program_register_sequence(rdev,
1600                                                  kalindi_mgcg_cgcg_init,
1601                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1602                 radeon_program_register_sequence(rdev,
1603                                                  godavari_golden_registers,
1604                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1605                 radeon_program_register_sequence(rdev,
1606                                                  kalindi_golden_common_registers,
1607                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1608                 radeon_program_register_sequence(rdev,
1609                                                  kalindi_golden_spm_registers,
1610                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1611                 break;
1612         case CHIP_KAVERI:
1613                 radeon_program_register_sequence(rdev,
1614                                                  spectre_mgcg_cgcg_init,
1615                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1616                 radeon_program_register_sequence(rdev,
1617                                                  spectre_golden_registers,
1618                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1619                 radeon_program_register_sequence(rdev,
1620                                                  spectre_golden_common_registers,
1621                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1622                 radeon_program_register_sequence(rdev,
1623                                                  spectre_golden_spm_registers,
1624                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1625                 break;
1626         case CHIP_HAWAII:
1627                 radeon_program_register_sequence(rdev,
1628                                                  hawaii_mgcg_cgcg_init,
1629                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1630                 radeon_program_register_sequence(rdev,
1631                                                  hawaii_golden_registers,
1632                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1633                 radeon_program_register_sequence(rdev,
1634                                                  hawaii_golden_common_registers,
1635                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  hawaii_golden_spm_registers,
1638                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1639                 break;
1640         default:
1641                 break;
1642         }
1643         mutex_unlock(&rdev->grbm_idx_mutex);
1644 }
1645
1646 /**
1647  * cik_get_xclk - get the xclk
1648  *
1649  * @rdev: radeon_device pointer
1650  *
1651  * Returns the reference clock used by the gfx engine
1652  * (CIK).
1653  */
1654 u32 cik_get_xclk(struct radeon_device *rdev)
1655 {
1656         u32 reference_clock = rdev->clock.spll.reference_freq;
1657
1658         if (rdev->flags & RADEON_IS_IGP) {
1659                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1660                         return reference_clock / 2;
1661         } else {
1662                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1663                         return reference_clock / 4;
1664         }
1665         return reference_clock;
1666 }
1667
1668 /**
1669  * cik_mm_rdoorbell - read a doorbell dword
1670  *
1671  * @rdev: radeon_device pointer
1672  * @index: doorbell index
1673  *
1674  * Returns the value in the doorbell aperture at the
1675  * requested doorbell index (CIK).
1676  */
1677 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1678 {
1679         if (index < rdev->doorbell.num_doorbells) {
1680                 return readl(rdev->doorbell.ptr + index);
1681         } else {
1682                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1683                 return 0;
1684         }
1685 }
1686
1687 /**
1688  * cik_mm_wdoorbell - write a doorbell dword
1689  *
1690  * @rdev: radeon_device pointer
1691  * @index: doorbell index
1692  * @v: value to write
1693  *
1694  * Writes @v to the doorbell aperture at the
1695  * requested doorbell index (CIK).
1696  */
1697 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1698 {
1699         if (index < rdev->doorbell.num_doorbells) {
1700                 writel(v, rdev->doorbell.ptr + index);
1701         } else {
1702                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1703         }
1704 }
1705
1706 #define BONAIRE_IO_MC_REGS_SIZE 36
1707
1708 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1709 {
1710         {0x00000070, 0x04400000},
1711         {0x00000071, 0x80c01803},
1712         {0x00000072, 0x00004004},
1713         {0x00000073, 0x00000100},
1714         {0x00000074, 0x00ff0000},
1715         {0x00000075, 0x34000000},
1716         {0x00000076, 0x08000014},
1717         {0x00000077, 0x00cc08ec},
1718         {0x00000078, 0x00000400},
1719         {0x00000079, 0x00000000},
1720         {0x0000007a, 0x04090000},
1721         {0x0000007c, 0x00000000},
1722         {0x0000007e, 0x4408a8e8},
1723         {0x0000007f, 0x00000304},
1724         {0x00000080, 0x00000000},
1725         {0x00000082, 0x00000001},
1726         {0x00000083, 0x00000002},
1727         {0x00000084, 0xf3e4f400},
1728         {0x00000085, 0x052024e3},
1729         {0x00000087, 0x00000000},
1730         {0x00000088, 0x01000000},
1731         {0x0000008a, 0x1c0a0000},
1732         {0x0000008b, 0xff010000},
1733         {0x0000008d, 0xffffefff},
1734         {0x0000008e, 0xfff3efff},
1735         {0x0000008f, 0xfff3efbf},
1736         {0x00000092, 0xf7ffffff},
1737         {0x00000093, 0xffffff7f},
1738         {0x00000095, 0x00101101},
1739         {0x00000096, 0x00000fff},
1740         {0x00000097, 0x00116fff},
1741         {0x00000098, 0x60010000},
1742         {0x00000099, 0x10010000},
1743         {0x0000009a, 0x00006000},
1744         {0x0000009b, 0x00001000},
1745         {0x0000009f, 0x00b48000}
1746 };
1747
1748 #define HAWAII_IO_MC_REGS_SIZE 22
1749
1750 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1751 {
1752         {0x0000007d, 0x40000000},
1753         {0x0000007e, 0x40180304},
1754         {0x0000007f, 0x0000ff00},
1755         {0x00000081, 0x00000000},
1756         {0x00000083, 0x00000800},
1757         {0x00000086, 0x00000000},
1758         {0x00000087, 0x00000100},
1759         {0x00000088, 0x00020100},
1760         {0x00000089, 0x00000000},
1761         {0x0000008b, 0x00040000},
1762         {0x0000008c, 0x00000100},
1763         {0x0000008e, 0xff010000},
1764         {0x00000090, 0xffffefff},
1765         {0x00000091, 0xfff3efff},
1766         {0x00000092, 0xfff3efbf},
1767         {0x00000093, 0xf7ffffff},
1768         {0x00000094, 0xffffff7f},
1769         {0x00000095, 0x00000fff},
1770         {0x00000096, 0x00116fff},
1771         {0x00000097, 0x60010000},
1772         {0x00000098, 0x10010000},
1773         {0x0000009f, 0x00c79000}
1774 };
1775
1776
1777 /**
1778  * cik_srbm_select - select specific register instances
1779  *
1780  * @rdev: radeon_device pointer
1781  * @me: selected ME (micro engine)
1782  * @pipe: pipe
1783  * @queue: queue
1784  * @vmid: VMID
1785  *
1786  * Switches the currently active registers instances.  Some
1787  * registers are instanced per VMID, others are instanced per
1788  * me/pipe/queue combination.
1789  */
1790 static void cik_srbm_select(struct radeon_device *rdev,
1791                             u32 me, u32 pipe, u32 queue, u32 vmid)
1792 {
1793         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1794                              MEID(me & 0x3) |
1795                              VMID(vmid & 0xf) |
1796                              QUEUEID(queue & 0x7));
1797         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1798 }
1799
1800 /* ucode loading */
1801 /**
1802  * ci_mc_load_microcode - load MC ucode into the hw
1803  *
1804  * @rdev: radeon_device pointer
1805  *
1806  * Load the GDDR MC ucode into the hw (CIK).
1807  * Returns 0 on success, error on failure.
1808  */
1809 int ci_mc_load_microcode(struct radeon_device *rdev)
1810 {
1811         const __be32 *fw_data = NULL;
1812         const __le32 *new_fw_data = NULL;
1813         u32 running, blackout = 0, tmp;
1814         u32 *io_mc_regs = NULL;
1815         const __le32 *new_io_mc_regs = NULL;
1816         int i, regs_size, ucode_size;
1817
1818         if (!rdev->mc_fw)
1819                 return -EINVAL;
1820
1821         if (rdev->new_fw) {
1822                 const struct mc_firmware_header_v1_0 *hdr =
1823                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1824
1825                 radeon_ucode_print_mc_hdr(&hdr->header);
1826
1827                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1828                 new_io_mc_regs = (const __le32 *)
1829                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1830                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1831                 new_fw_data = (const __le32 *)
1832                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1833         } else {
1834                 ucode_size = rdev->mc_fw->size / 4;
1835
1836                 switch (rdev->family) {
1837                 case CHIP_BONAIRE:
1838                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1839                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1840                         break;
1841                 case CHIP_HAWAII:
1842                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1843                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1844                         break;
1845                 default:
1846                         return -EINVAL;
1847                 }
1848                 fw_data = (const __be32 *)rdev->mc_fw->data;
1849         }
1850
1851         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1852
1853         if (running == 0) {
1854                 if (running) {
1855                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1856                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1857                 }
1858
1859                 /* reset the engine and set to writable */
1860                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1862
1863                 /* load mc io regs */
1864                 for (i = 0; i < regs_size; i++) {
1865                         if (rdev->new_fw) {
1866                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1867                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1868                         } else {
1869                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1870                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1871                         }
1872                 }
1873
1874                 tmp = RREG32(MC_SEQ_MISC0);
1875                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1876                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1877                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1878                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1879                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1880                 }
1881
1882                 /* load the MC ucode */
1883                 for (i = 0; i < ucode_size; i++) {
1884                         if (rdev->new_fw)
1885                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1886                         else
1887                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1888                 }
1889
1890                 /* put the engine back into the active state */
1891                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1892                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1893                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1894
1895                 /* wait for training to complete */
1896                 for (i = 0; i < rdev->usec_timeout; i++) {
1897                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1898                                 break;
1899                         udelay(1);
1900                 }
1901                 for (i = 0; i < rdev->usec_timeout; i++) {
1902                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1903                                 break;
1904                         udelay(1);
1905                 }
1906
1907                 if (running)
1908                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1909         }
1910
1911         return 0;
1912 }
1913
1914 /**
1915  * cik_init_microcode - load ucode images from disk
1916  *
1917  * @rdev: radeon_device pointer
1918  *
1919  * Use the firmware interface to load the ucode images into
1920  * the driver (not loaded into hw).
1921  * Returns 0 on success, error on failure.
1922  */
1923 static int cik_init_microcode(struct radeon_device *rdev)
1924 {
1925         const char *chip_name;
1926         const char *new_chip_name;
1927         size_t pfp_req_size, me_req_size, ce_req_size,
1928                 mec_req_size, rlc_req_size, mc_req_size = 0,
1929                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1930         char fw_name[30];
1931         int new_fw = 0;
1932         int err;
1933         int num_fw;
1934
1935         DRM_DEBUG("\n");
1936
1937         switch (rdev->family) {
1938         case CHIP_BONAIRE:
1939                 chip_name = "BONAIRE";
1940                 new_chip_name = "bonaire";
1941                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1942                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1943                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1944                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1945                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1946                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1947                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1948                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1949                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1950                 num_fw = 8;
1951                 break;
1952         case CHIP_HAWAII:
1953                 chip_name = "HAWAII";
1954                 new_chip_name = "hawaii";
1955                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1956                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1957                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1958                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1959                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1960                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1961                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1962                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1963                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1964                 num_fw = 8;
1965                 break;
1966         case CHIP_KAVERI:
1967                 chip_name = "KAVERI";
1968                 new_chip_name = "kaveri";
1969                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1971                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1974                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1975                 num_fw = 7;
1976                 break;
1977         case CHIP_KABINI:
1978                 chip_name = "KABINI";
1979                 new_chip_name = "kabini";
1980                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1981                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1982                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1983                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1984                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1985                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1986                 num_fw = 6;
1987                 break;
1988         case CHIP_MULLINS:
1989                 chip_name = "MULLINS";
1990                 new_chip_name = "mullins";
1991                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1992                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1993                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1994                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1995                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1996                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                 num_fw = 6;
1998                 break;
1999         default: BUG();
2000         }
2001
2002         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2003
2004         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2005         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2006         if (err) {
2007                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2008                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2009                 if (err)
2010                         goto out;
2011                 if (rdev->pfp_fw->size != pfp_req_size) {
2012                         printk(KERN_ERR
2013                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2014                                rdev->pfp_fw->size, fw_name);
2015                         err = -EINVAL;
2016                         goto out;
2017                 }
2018         } else {
2019                 err = radeon_ucode_validate(rdev->pfp_fw);
2020                 if (err) {
2021                         printk(KERN_ERR
2022                                "cik_fw: validation failed for firmware \"%s\"\n",
2023                                fw_name);
2024                         goto out;
2025                 } else {
2026                         new_fw++;
2027                 }
2028         }
2029
2030         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2031         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2032         if (err) {
2033                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2034                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2035                 if (err)
2036                         goto out;
2037                 if (rdev->me_fw->size != me_req_size) {
2038                         printk(KERN_ERR
2039                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2040                                rdev->me_fw->size, fw_name);
2041                         err = -EINVAL;
2042                 }
2043         } else {
2044                 err = radeon_ucode_validate(rdev->me_fw);
2045                 if (err) {
2046                         printk(KERN_ERR
2047                                "cik_fw: validation failed for firmware \"%s\"\n",
2048                                fw_name);
2049                         goto out;
2050                 } else {
2051                         new_fw++;
2052                 }
2053         }
2054
2055         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2056         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2057         if (err) {
2058                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2059                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2060                 if (err)
2061                         goto out;
2062                 if (rdev->ce_fw->size != ce_req_size) {
2063                         printk(KERN_ERR
2064                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065                                rdev->ce_fw->size, fw_name);
2066                         err = -EINVAL;
2067                 }
2068         } else {
2069                 err = radeon_ucode_validate(rdev->ce_fw);
2070                 if (err) {
2071                         printk(KERN_ERR
2072                                "cik_fw: validation failed for firmware \"%s\"\n",
2073                                fw_name);
2074                         goto out;
2075                 } else {
2076                         new_fw++;
2077                 }
2078         }
2079
2080         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2081         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2082         if (err) {
2083                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2084                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2085                 if (err)
2086                         goto out;
2087                 if (rdev->mec_fw->size != mec_req_size) {
2088                         printk(KERN_ERR
2089                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2090                                rdev->mec_fw->size, fw_name);
2091                         err = -EINVAL;
2092                 }
2093         } else {
2094                 err = radeon_ucode_validate(rdev->mec_fw);
2095                 if (err) {
2096                         printk(KERN_ERR
2097                                "cik_fw: validation failed for firmware \"%s\"\n",
2098                                fw_name);
2099                         goto out;
2100                 } else {
2101                         new_fw++;
2102                 }
2103         }
2104
2105         if (rdev->family == CHIP_KAVERI) {
2106                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2107                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2108                 if (err) {
2109                         goto out;
2110                 } else {
2111                         err = radeon_ucode_validate(rdev->mec2_fw);
2112                         if (err) {
2113                                 goto out;
2114                         } else {
2115                                 new_fw++;
2116                         }
2117                 }
2118         }
2119
2120         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2121         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2122         if (err) {
2123                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2124                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2125                 if (err)
2126                         goto out;
2127                 if (rdev->rlc_fw->size != rlc_req_size) {
2128                         printk(KERN_ERR
2129                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2130                                rdev->rlc_fw->size, fw_name);
2131                         err = -EINVAL;
2132                 }
2133         } else {
2134                 err = radeon_ucode_validate(rdev->rlc_fw);
2135                 if (err) {
2136                         printk(KERN_ERR
2137                                "cik_fw: validation failed for firmware \"%s\"\n",
2138                                fw_name);
2139                         goto out;
2140                 } else {
2141                         new_fw++;
2142                 }
2143         }
2144
2145         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2146         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2147         if (err) {
2148                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2149                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2150                 if (err)
2151                         goto out;
2152                 if (rdev->sdma_fw->size != sdma_req_size) {
2153                         printk(KERN_ERR
2154                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2155                                rdev->sdma_fw->size, fw_name);
2156                         err = -EINVAL;
2157                 }
2158         } else {
2159                 err = radeon_ucode_validate(rdev->sdma_fw);
2160                 if (err) {
2161                         printk(KERN_ERR
2162                                "cik_fw: validation failed for firmware \"%s\"\n",
2163                                fw_name);
2164                         goto out;
2165                 } else {
2166                         new_fw++;
2167                 }
2168         }
2169
2170         /* No SMC, MC ucode on APUs */
2171         if (!(rdev->flags & RADEON_IS_IGP)) {
2172                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2173                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2174                 if (err) {
2175                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2176                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2177                         if (err) {
2178                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2179                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2180                                 if (err)
2181                                         goto out;
2182                         }
2183                         if ((rdev->mc_fw->size != mc_req_size) &&
2184                             (rdev->mc_fw->size != mc2_req_size)){
2185                                 printk(KERN_ERR
2186                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2187                                        rdev->mc_fw->size, fw_name);
2188                                 err = -EINVAL;
2189                         }
2190                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2191                 } else {
2192                         err = radeon_ucode_validate(rdev->mc_fw);
2193                         if (err) {
2194                                 printk(KERN_ERR
2195                                        "cik_fw: validation failed for firmware \"%s\"\n",
2196                                        fw_name);
2197                                 goto out;
2198                         } else {
2199                                 new_fw++;
2200                         }
2201                 }
2202
2203                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2204                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2205                 if (err) {
2206                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2207                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2208                         if (err) {
2209                                 printk(KERN_ERR
2210                                        "smc: error loading firmware \"%s\"\n",
2211                                        fw_name);
2212                                 release_firmware(rdev->smc_fw);
2213                                 rdev->smc_fw = NULL;
2214                                 err = 0;
2215                         } else if (rdev->smc_fw->size != smc_req_size) {
2216                                 printk(KERN_ERR
2217                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2218                                        rdev->smc_fw->size, fw_name);
2219                                 err = -EINVAL;
2220                         }
2221                 } else {
2222                         err = radeon_ucode_validate(rdev->smc_fw);
2223                         if (err) {
2224                                 printk(KERN_ERR
2225                                        "cik_fw: validation failed for firmware \"%s\"\n",
2226                                        fw_name);
2227                                 goto out;
2228                         } else {
2229                                 new_fw++;
2230                         }
2231                 }
2232         }
2233
2234         if (new_fw == 0) {
2235                 rdev->new_fw = false;
2236         } else if (new_fw < num_fw) {
2237                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2238                 err = -EINVAL;
2239         } else {
2240                 rdev->new_fw = true;
2241         }
2242
2243 out:
2244         if (err) {
2245                 if (err != -EINVAL)
2246                         printk(KERN_ERR
2247                                "cik_cp: Failed to load firmware \"%s\"\n",
2248                                fw_name);
2249                 release_firmware(rdev->pfp_fw);
2250                 rdev->pfp_fw = NULL;
2251                 release_firmware(rdev->me_fw);
2252                 rdev->me_fw = NULL;
2253                 release_firmware(rdev->ce_fw);
2254                 rdev->ce_fw = NULL;
2255                 release_firmware(rdev->mec_fw);
2256                 rdev->mec_fw = NULL;
2257                 release_firmware(rdev->mec2_fw);
2258                 rdev->mec2_fw = NULL;
2259                 release_firmware(rdev->rlc_fw);
2260                 rdev->rlc_fw = NULL;
2261                 release_firmware(rdev->sdma_fw);
2262                 rdev->sdma_fw = NULL;
2263                 release_firmware(rdev->mc_fw);
2264                 rdev->mc_fw = NULL;
2265                 release_firmware(rdev->smc_fw);
2266                 rdev->smc_fw = NULL;
2267         }
2268         return err;
2269 }
2270
2271 /*
2272  * Core functions
2273  */
2274 /**
2275  * cik_tiling_mode_table_init - init the hw tiling table
2276  *
2277  * @rdev: radeon_device pointer
2278  *
2279  * Starting with SI, the tiling setup is done globally in a
2280  * set of 32 tiling modes.  Rather than selecting each set of
2281  * parameters per surface as on older asics, we just select
2282  * which index in the tiling table we want to use, and the
2283  * surface uses those parameters (CIK).
2284  */
2285 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2286 {
2287         const u32 num_tile_mode_states = 32;
2288         const u32 num_secondary_tile_mode_states = 16;
2289         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2290         u32 num_pipe_configs;
2291         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2292                 rdev->config.cik.max_shader_engines;
2293
2294         switch (rdev->config.cik.mem_row_size_in_kb) {
2295         case 1:
2296                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2297                 break;
2298         case 2:
2299         default:
2300                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2301                 break;
2302         case 4:
2303                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2304                 break;
2305         }
2306
2307         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2308         if (num_pipe_configs > 8)
2309                 num_pipe_configs = 16;
2310
2311         if (num_pipe_configs == 16) {
2312                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2313                         switch (reg_offset) {
2314                         case 0:
2315                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2317                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2319                                 break;
2320                         case 1:
2321                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2325                                 break;
2326                         case 2:
2327                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331                                 break;
2332                         case 3:
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2337                                 break;
2338                         case 4:
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                                  TILE_SPLIT(split_equal_to_row_size));
2343                                 break;
2344                         case 5:
2345                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2346                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348                                 break;
2349                         case 6:
2350                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354                                 break;
2355                         case 7:
2356                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                                  TILE_SPLIT(split_equal_to_row_size));
2360                                 break;
2361                         case 8:
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2364                                 break;
2365                         case 9:
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2369                                 break;
2370                         case 10:
2371                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375                                 break;
2376                         case 11:
2377                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2380                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                                 break;
2382                         case 12:
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387                                 break;
2388                         case 13:
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2392                                 break;
2393                         case 14:
2394                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398                                 break;
2399                         case 16:
2400                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2403                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                                 break;
2405                         case 17:
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                                 break;
2411                         case 27:
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2415                                 break;
2416                         case 28:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                                 break;
2422                         case 29:
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                                 break;
2428                         case 30:
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                                 break;
2434                         default:
2435                                 gb_tile_moden = 0;
2436                                 break;
2437                         }
2438                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2439                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2440                 }
2441                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2442                         switch (reg_offset) {
2443                         case 0:
2444                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2448                                 break;
2449                         case 1:
2450                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2454                                 break;
2455                         case 2:
2456                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2460                                 break;
2461                         case 3:
2462                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2466                                 break;
2467                         case 4:
2468                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2472                                 break;
2473                         case 5:
2474                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2478                                 break;
2479                         case 6:
2480                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2484                                 break;
2485                         case 8:
2486                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2490                                 break;
2491                         case 9:
2492                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2496                                 break;
2497                         case 10:
2498                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2502                                 break;
2503                         case 11:
2504                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2508                                 break;
2509                         case 12:
2510                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2514                                 break;
2515                         case 13:
2516                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2520                                 break;
2521                         case 14:
2522                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2526                                 break;
2527                         default:
2528                                 gb_tile_moden = 0;
2529                                 break;
2530                         }
2531                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2532                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2533                 }
2534         } else if (num_pipe_configs == 8) {
2535                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2536                         switch (reg_offset) {
2537                         case 0:
2538                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2542                                 break;
2543                         case 1:
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2548                                 break;
2549                         case 2:
2550                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2552                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2554                                 break;
2555                         case 3:
2556                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2558                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2560                                 break;
2561                         case 4:
2562                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2564                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                                  TILE_SPLIT(split_equal_to_row_size));
2566                                 break;
2567                         case 5:
2568                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                                 break;
2572                         case 6:
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2577                                 break;
2578                         case 7:
2579                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2580                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2581                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                                  TILE_SPLIT(split_equal_to_row_size));
2583                                 break;
2584                         case 8:
2585                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2587                                 break;
2588                         case 9:
2589                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2592                                 break;
2593                         case 10:
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                                 break;
2599                         case 11:
2600                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604                                 break;
2605                         case 12:
2606                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2607                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610                                 break;
2611                         case 13:
2612                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2615                                 break;
2616                         case 14:
2617                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                                 break;
2622                         case 16:
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                                 break;
2628                         case 17:
2629                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                                 break;
2634                         case 27:
2635                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638                                 break;
2639                         case 28:
2640                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2642                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                 break;
2645                         case 29:
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                 break;
2651                         case 30:
2652                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                 break;
2657                         default:
2658                                 gb_tile_moden = 0;
2659                                 break;
2660                         }
2661                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2662                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2663                 }
2664                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2665                         switch (reg_offset) {
2666                         case 0:
2667                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                                 break;
2672                         case 1:
2673                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2675                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                                 break;
2678                         case 2:
2679                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2683                                 break;
2684                         case 3:
2685                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2689                                 break;
2690                         case 4:
2691                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2695                                 break;
2696                         case 5:
2697                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2700                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2701                                 break;
2702                         case 6:
2703                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2707                                 break;
2708                         case 8:
2709                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2713                                 break;
2714                         case 9:
2715                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2719                                 break;
2720                         case 10:
2721                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2725                                 break;
2726                         case 11:
2727                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2731                                 break;
2732                         case 12:
2733                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2737                                 break;
2738                         case 13:
2739                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2743                                 break;
2744                         case 14:
2745                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2749                                 break;
2750                         default:
2751                                 gb_tile_moden = 0;
2752                                 break;
2753                         }
2754                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2755                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756                 }
2757         } else if (num_pipe_configs == 4) {
2758                 if (num_rbs == 4) {
2759                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2760                                 switch (reg_offset) {
2761                                 case 0:
2762                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2766                                         break;
2767                                 case 1:
2768                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2772                                         break;
2773                                 case 2:
2774                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2776                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2778                                         break;
2779                                 case 3:
2780                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2782                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2784                                         break;
2785                                 case 4:
2786                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2788                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                                          TILE_SPLIT(split_equal_to_row_size));
2790                                         break;
2791                                 case 5:
2792                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2793                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795                                         break;
2796                                 case 6:
2797                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2798                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801                                         break;
2802                                 case 7:
2803                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                                          TILE_SPLIT(split_equal_to_row_size));
2807                                         break;
2808                                 case 8:
2809                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2810                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2811                                         break;
2812                                 case 9:
2813                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2816                                         break;
2817                                 case 10:
2818                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2820                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                                         break;
2823                                 case 11:
2824                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828                                         break;
2829                                 case 12:
2830                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2831                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834                                         break;
2835                                 case 13:
2836                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2839                                         break;
2840                                 case 14:
2841                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845                                         break;
2846                                 case 16:
2847                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                                         break;
2852                                 case 17:
2853                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2854                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                                         break;
2858                                 case 27:
2859                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2862                                         break;
2863                                 case 28:
2864                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868                                         break;
2869                                 case 29:
2870                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                                         break;
2875                                 case 30:
2876                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2877                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880                                         break;
2881                                 default:
2882                                         gb_tile_moden = 0;
2883                                         break;
2884                                 }
2885                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2886                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2887                         }
2888                 } else if (num_rbs < 4) {
2889                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2890                                 switch (reg_offset) {
2891                                 case 0:
2892                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2895                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896                                         break;
2897                                 case 1:
2898                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2901                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902                                         break;
2903                                 case 2:
2904                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                                         break;
2909                                 case 3:
2910                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2913                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2914                                         break;
2915                                 case 4:
2916                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2919                                                          TILE_SPLIT(split_equal_to_row_size));
2920                                         break;
2921                                 case 5:
2922                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                                         break;
2926                                 case 6:
2927                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2930                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931                                         break;
2932                                 case 7:
2933                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2934                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2935                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2936                                                          TILE_SPLIT(split_equal_to_row_size));
2937                                         break;
2938                                 case 8:
2939                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2941                                         break;
2942                                 case 9:
2943                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2945                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2946                                         break;
2947                                 case 10:
2948                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2949                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2951                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952                                         break;
2953                                 case 11:
2954                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2955                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2957                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                                         break;
2959                                 case 12:
2960                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2962                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964                                         break;
2965                                 case 13:
2966                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2967                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2968                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2969                                         break;
2970                                 case 14:
2971                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2974                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975                                         break;
2976                                 case 16:
2977                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2979                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2980                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                                         break;
2982                                 case 17:
2983                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2984                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2985                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2986                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                                         break;
2988                                 case 27:
2989                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2992                                         break;
2993                                 case 28:
2994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998                                         break;
2999                                 case 29:
3000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3002                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3003                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3004                                         break;
3005                                 case 30:
3006                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3007                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3009                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                                         break;
3011                                 default:
3012                                         gb_tile_moden = 0;
3013                                         break;
3014                                 }
3015                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3016                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3017                         }
3018                 }
3019                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3020                         switch (reg_offset) {
3021                         case 0:
3022                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3024                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3025                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3026                                 break;
3027                         case 1:
3028                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3032                                 break;
3033                         case 2:
3034                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3037                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3038                                 break;
3039                         case 3:
3040                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3044                                 break;
3045                         case 4:
3046                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3050                                 break;
3051                         case 5:
3052                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3056                                 break;
3057                         case 6:
3058                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3062                                 break;
3063                         case 8:
3064                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3065                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3068                                 break;
3069                         case 9:
3070                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3074                                 break;
3075                         case 10:
3076                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3080                                 break;
3081                         case 11:
3082                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3084                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3086                                 break;
3087                         case 12:
3088                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3091                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3092                                 break;
3093                         case 13:
3094                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3097                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3098                                 break;
3099                         case 14:
3100                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3103                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3104                                 break;
3105                         default:
3106                                 gb_tile_moden = 0;
3107                                 break;
3108                         }
3109                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3110                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3111                 }
3112         } else if (num_pipe_configs == 2) {
3113                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3114                         switch (reg_offset) {
3115                         case 0:
3116                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3118                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3119                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3120                                 break;
3121                         case 1:
3122                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3124                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3126                                 break;
3127                         case 2:
3128                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3129                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3130                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3131                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3132                                 break;
3133                         case 3:
3134                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3136                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3138                                 break;
3139                         case 4:
3140                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3142                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                                  TILE_SPLIT(split_equal_to_row_size));
3144                                 break;
3145                         case 5:
3146                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3147                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149                                 break;
3150                         case 6:
3151                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3152                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3153                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3155                                 break;
3156                         case 7:
3157                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3158                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3159                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                                  TILE_SPLIT(split_equal_to_row_size));
3161                                 break;
3162                         case 8:
3163                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3164                                                 PIPE_CONFIG(ADDR_SURF_P2);
3165                                 break;
3166                         case 9:
3167                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3169                                                  PIPE_CONFIG(ADDR_SURF_P2));
3170                                 break;
3171                         case 10:
3172                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3174                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176                                 break;
3177                         case 11:
3178                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3179                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3180                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182                                 break;
3183                         case 12:
3184                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3185                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3186                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188                                 break;
3189                         case 13:
3190                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3192                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3193                                 break;
3194                         case 14:
3195                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3197                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199                                 break;
3200                         case 16:
3201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3204                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205                                 break;
3206                         case 17:
3207                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3208                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3209                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211                                 break;
3212                         case 27:
3213                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3214                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3215                                                  PIPE_CONFIG(ADDR_SURF_P2));
3216                                 break;
3217                         case 28:
3218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3220                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                                 break;
3223                         case 29:
3224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3227                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228                                 break;
3229                         case 30:
3230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234                                 break;
3235                         default:
3236                                 gb_tile_moden = 0;
3237                                 break;
3238                         }
3239                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3240                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3241                 }
3242                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3243                         switch (reg_offset) {
3244                         case 0:
3245                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3247                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                                 break;
3250                         case 1:
3251                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3252                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3253                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3255                                 break;
3256                         case 2:
3257                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3259                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3261                                 break;
3262                         case 3:
3263                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3264                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3265                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3267                                 break;
3268                         case 4:
3269                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3273                                 break;
3274                         case 5:
3275                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3277                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3279                                 break;
3280                         case 6:
3281                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3285                                 break;
3286                         case 8:
3287                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3291                                 break;
3292                         case 9:
3293                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3297                                 break;
3298                         case 10:
3299                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3300                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3301                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3302                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3303                                 break;
3304                         case 11:
3305                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3309                                 break;
3310                         case 12:
3311                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3312                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3313                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3315                                 break;
3316                         case 13:
3317                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3321                                 break;
3322                         case 14:
3323                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3327                                 break;
3328                         default:
3329                                 gb_tile_moden = 0;
3330                                 break;
3331                         }
3332                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3333                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3334                 }
3335         } else
3336                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3337 }
3338
3339 /**
3340  * cik_select_se_sh - select which SE, SH to address
3341  *
3342  * @rdev: radeon_device pointer
3343  * @se_num: shader engine to address
3344  * @sh_num: sh block to address
3345  *
3346  * Select which SE, SH combinations to address. Certain
3347  * registers are instanced per SE or SH.  0xffffffff means
3348  * broadcast to all SEs or SHs (CIK).
3349  */
3350 static void cik_select_se_sh(struct radeon_device *rdev,
3351                              u32 se_num, u32 sh_num)
3352 {
3353         u32 data = INSTANCE_BROADCAST_WRITES;
3354
3355         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3356                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3357         else if (se_num == 0xffffffff)
3358                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3359         else if (sh_num == 0xffffffff)
3360                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3361         else
3362                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3363         WREG32(GRBM_GFX_INDEX, data);
3364 }
3365
3366 /**
3367  * cik_create_bitmask - create a bitmask
3368  *
3369  * @bit_width: length of the mask
3370  *
3371  * create a variable length bit mask (CIK).
3372  * Returns the bitmask.
3373  */
3374 static u32 cik_create_bitmask(u32 bit_width)
3375 {
3376         u32 i, mask = 0;
3377
3378         for (i = 0; i < bit_width; i++) {
3379                 mask <<= 1;
3380                 mask |= 1;
3381         }
3382         return mask;
3383 }
3384
3385 /**
3386  * cik_get_rb_disabled - computes the mask of disabled RBs
3387  *
3388  * @rdev: radeon_device pointer
3389  * @max_rb_num: max RBs (render backends) for the asic
3390  * @se_num: number of SEs (shader engines) for the asic
3391  * @sh_per_se: number of SH blocks per SE for the asic
3392  *
3393  * Calculates the bitmask of disabled RBs (CIK).
3394  * Returns the disabled RB bitmask.
3395  */
3396 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3397                               u32 max_rb_num_per_se,
3398                               u32 sh_per_se)
3399 {
3400         u32 data, mask;
3401
3402         data = RREG32(CC_RB_BACKEND_DISABLE);
3403         if (data & 1)
3404                 data &= BACKEND_DISABLE_MASK;
3405         else
3406                 data = 0;
3407         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3408
3409         data >>= BACKEND_DISABLE_SHIFT;
3410
3411         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3412
3413         return data & mask;
3414 }
3415
3416 /**
3417  * cik_setup_rb - setup the RBs on the asic
3418  *
3419  * @rdev: radeon_device pointer
3420  * @se_num: number of SEs (shader engines) for the asic
3421  * @sh_per_se: number of SH blocks per SE for the asic
3422  * @max_rb_num: max RBs (render backends) for the asic
3423  *
3424  * Configures per-SE/SH RB registers (CIK).
3425  */
3426 static void cik_setup_rb(struct radeon_device *rdev,
3427                          u32 se_num, u32 sh_per_se,
3428                          u32 max_rb_num_per_se)
3429 {
3430         int i, j;
3431         u32 data, mask;
3432         u32 disabled_rbs = 0;
3433         u32 enabled_rbs = 0;
3434
3435         mutex_lock(&rdev->grbm_idx_mutex);
3436         for (i = 0; i < se_num; i++) {
3437                 for (j = 0; j < sh_per_se; j++) {
3438                         cik_select_se_sh(rdev, i, j);
3439                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3440                         if (rdev->family == CHIP_HAWAII)
3441                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3442                         else
3443                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3444                 }
3445         }
3446         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3447         mutex_unlock(&rdev->grbm_idx_mutex);
3448
3449         mask = 1;
3450         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3451                 if (!(disabled_rbs & mask))
3452                         enabled_rbs |= mask;
3453                 mask <<= 1;
3454         }
3455
3456         rdev->config.cik.backend_enable_mask = enabled_rbs;
3457
3458         mutex_lock(&rdev->grbm_idx_mutex);
3459         for (i = 0; i < se_num; i++) {
3460                 cik_select_se_sh(rdev, i, 0xffffffff);
3461                 data = 0;
3462                 for (j = 0; j < sh_per_se; j++) {
3463                         switch (enabled_rbs & 3) {
3464                         case 0:
3465                                 if (j == 0)
3466                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3467                                 else
3468                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3469                                 break;
3470                         case 1:
3471                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3472                                 break;
3473                         case 2:
3474                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3475                                 break;
3476                         case 3:
3477                         default:
3478                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3479                                 break;
3480                         }
3481                         enabled_rbs >>= 2;
3482                 }
3483                 WREG32(PA_SC_RASTER_CONFIG, data);
3484         }
3485         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3486         mutex_unlock(&rdev->grbm_idx_mutex);
3487 }
3488
3489 /**
3490  * cik_gpu_init - setup the 3D engine
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Configures the 3D engine and tiling configuration
3495  * registers so that the 3D engine is usable.
3496  */
3497 static void cik_gpu_init(struct radeon_device *rdev)
3498 {
3499         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3500         u32 mc_shared_chmap, mc_arb_ramcfg;
3501         u32 hdp_host_path_cntl;
3502         u32 tmp;
3503         int i, j;
3504
3505         switch (rdev->family) {
3506         case CHIP_BONAIRE:
3507                 rdev->config.cik.max_shader_engines = 2;
3508                 rdev->config.cik.max_tile_pipes = 4;
3509                 rdev->config.cik.max_cu_per_sh = 7;
3510                 rdev->config.cik.max_sh_per_se = 1;
3511                 rdev->config.cik.max_backends_per_se = 2;
3512                 rdev->config.cik.max_texture_channel_caches = 4;
3513                 rdev->config.cik.max_gprs = 256;
3514                 rdev->config.cik.max_gs_threads = 32;
3515                 rdev->config.cik.max_hw_contexts = 8;
3516
3517                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3522                 break;
3523         case CHIP_HAWAII:
3524                 rdev->config.cik.max_shader_engines = 4;
3525                 rdev->config.cik.max_tile_pipes = 16;
3526                 rdev->config.cik.max_cu_per_sh = 11;
3527                 rdev->config.cik.max_sh_per_se = 1;
3528                 rdev->config.cik.max_backends_per_se = 4;
3529                 rdev->config.cik.max_texture_channel_caches = 16;
3530                 rdev->config.cik.max_gprs = 256;
3531                 rdev->config.cik.max_gs_threads = 32;
3532                 rdev->config.cik.max_hw_contexts = 8;
3533
3534                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3535                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3536                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3537                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3538                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3539                 break;
3540         case CHIP_KAVERI:
3541                 rdev->config.cik.max_shader_engines = 1;
3542                 rdev->config.cik.max_tile_pipes = 4;
3543                 if ((rdev->pdev->device == 0x1304) ||
3544                     (rdev->pdev->device == 0x1305) ||
3545                     (rdev->pdev->device == 0x130C) ||
3546                     (rdev->pdev->device == 0x130F) ||
3547                     (rdev->pdev->device == 0x1310) ||
3548                     (rdev->pdev->device == 0x1311) ||
3549                     (rdev->pdev->device == 0x131C)) {
3550                         rdev->config.cik.max_cu_per_sh = 8;
3551                         rdev->config.cik.max_backends_per_se = 2;
3552                 } else if ((rdev->pdev->device == 0x1309) ||
3553                            (rdev->pdev->device == 0x130A) ||
3554                            (rdev->pdev->device == 0x130D) ||
3555                            (rdev->pdev->device == 0x1313) ||
3556                            (rdev->pdev->device == 0x131D)) {
3557                         rdev->config.cik.max_cu_per_sh = 6;
3558                         rdev->config.cik.max_backends_per_se = 2;
3559                 } else if ((rdev->pdev->device == 0x1306) ||
3560                            (rdev->pdev->device == 0x1307) ||
3561                            (rdev->pdev->device == 0x130B) ||
3562                            (rdev->pdev->device == 0x130E) ||
3563                            (rdev->pdev->device == 0x1315) ||
3564                            (rdev->pdev->device == 0x1318) ||
3565                            (rdev->pdev->device == 0x131B)) {
3566                         rdev->config.cik.max_cu_per_sh = 4;
3567                         rdev->config.cik.max_backends_per_se = 1;
3568                 } else {
3569                         rdev->config.cik.max_cu_per_sh = 3;
3570                         rdev->config.cik.max_backends_per_se = 1;
3571                 }
3572                 rdev->config.cik.max_sh_per_se = 1;
3573                 rdev->config.cik.max_texture_channel_caches = 4;
3574                 rdev->config.cik.max_gprs = 256;
3575                 rdev->config.cik.max_gs_threads = 16;
3576                 rdev->config.cik.max_hw_contexts = 8;
3577
3578                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3579                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3580                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3581                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3582                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3583                 break;
3584         case CHIP_KABINI:
3585         case CHIP_MULLINS:
3586         default:
3587                 rdev->config.cik.max_shader_engines = 1;
3588                 rdev->config.cik.max_tile_pipes = 2;
3589                 rdev->config.cik.max_cu_per_sh = 2;
3590                 rdev->config.cik.max_sh_per_se = 1;
3591                 rdev->config.cik.max_backends_per_se = 1;
3592                 rdev->config.cik.max_texture_channel_caches = 2;
3593                 rdev->config.cik.max_gprs = 256;
3594                 rdev->config.cik.max_gs_threads = 16;
3595                 rdev->config.cik.max_hw_contexts = 8;
3596
3597                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3598                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3599                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3600                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3601                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3602                 break;
3603         }
3604
3605         /* Initialize HDP */
3606         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3607                 WREG32((0x2c14 + j), 0x00000000);
3608                 WREG32((0x2c18 + j), 0x00000000);
3609                 WREG32((0x2c1c + j), 0x00000000);
3610                 WREG32((0x2c20 + j), 0x00000000);
3611                 WREG32((0x2c24 + j), 0x00000000);
3612         }
3613
3614         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3615
3616         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3617
3618         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3619         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3620
3621         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3622         rdev->config.cik.mem_max_burst_length_bytes = 256;
3623         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3624         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3625         if (rdev->config.cik.mem_row_size_in_kb > 4)
3626                 rdev->config.cik.mem_row_size_in_kb = 4;
3627         /* XXX use MC settings? */
3628         rdev->config.cik.shader_engine_tile_size = 32;
3629         rdev->config.cik.num_gpus = 1;
3630         rdev->config.cik.multi_gpu_tile_size = 64;
3631
3632         /* fix up row size */
3633         gb_addr_config &= ~ROW_SIZE_MASK;
3634         switch (rdev->config.cik.mem_row_size_in_kb) {
3635         case 1:
3636         default:
3637                 gb_addr_config |= ROW_SIZE(0);
3638                 break;
3639         case 2:
3640                 gb_addr_config |= ROW_SIZE(1);
3641                 break;
3642         case 4:
3643                 gb_addr_config |= ROW_SIZE(2);
3644                 break;
3645         }
3646
3647         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3648          * not have bank info, so create a custom tiling dword.
3649          * bits 3:0   num_pipes
3650          * bits 7:4   num_banks
3651          * bits 11:8  group_size
3652          * bits 15:12 row_size
3653          */
3654         rdev->config.cik.tile_config = 0;
3655         switch (rdev->config.cik.num_tile_pipes) {
3656         case 1:
3657                 rdev->config.cik.tile_config |= (0 << 0);
3658                 break;
3659         case 2:
3660                 rdev->config.cik.tile_config |= (1 << 0);
3661                 break;
3662         case 4:
3663                 rdev->config.cik.tile_config |= (2 << 0);
3664                 break;
3665         case 8:
3666         default:
3667                 /* XXX what about 12? */
3668                 rdev->config.cik.tile_config |= (3 << 0);
3669                 break;
3670         }
3671         rdev->config.cik.tile_config |=
3672                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3673         rdev->config.cik.tile_config |=
3674                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3675         rdev->config.cik.tile_config |=
3676                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3677
3678         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3679         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3680         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3681         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3682         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3683         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3684         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3685         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3686
3687         cik_tiling_mode_table_init(rdev);
3688
3689         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3690                      rdev->config.cik.max_sh_per_se,
3691                      rdev->config.cik.max_backends_per_se);
3692
3693         rdev->config.cik.active_cus = 0;
3694         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3695                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3696                         rdev->config.cik.active_cus +=
3697                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3698                 }
3699         }
3700
3701         /* set HW defaults for 3D engine */
3702         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3703
3704         mutex_lock(&rdev->grbm_idx_mutex);
3705         /*
3706          * making sure that the following register writes will be broadcasted
3707          * to all the shaders
3708          */
3709         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3710         WREG32(SX_DEBUG_1, 0x20);
3711
3712         WREG32(TA_CNTL_AUX, 0x00010000);
3713
3714         tmp = RREG32(SPI_CONFIG_CNTL);
3715         tmp |= 0x03000000;
3716         WREG32(SPI_CONFIG_CNTL, tmp);
3717
3718         WREG32(SQ_CONFIG, 1);
3719
3720         WREG32(DB_DEBUG, 0);
3721
3722         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3723         tmp |= 0x00000400;
3724         WREG32(DB_DEBUG2, tmp);
3725
3726         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3727         tmp |= 0x00020200;
3728         WREG32(DB_DEBUG3, tmp);
3729
3730         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3731         tmp |= 0x00018208;
3732         WREG32(CB_HW_CONTROL, tmp);
3733
3734         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3735
3736         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3737                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3738                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3739                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3740
3741         WREG32(VGT_NUM_INSTANCES, 1);
3742
3743         WREG32(CP_PERFMON_CNTL, 0);
3744
3745         WREG32(SQ_CONFIG, 0);
3746
3747         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3748                                           FORCE_EOV_MAX_REZ_CNT(255)));
3749
3750         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3751                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3752
3753         WREG32(VGT_GS_VERTEX_REUSE, 16);
3754         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3755
3756         tmp = RREG32(HDP_MISC_CNTL);
3757         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3758         WREG32(HDP_MISC_CNTL, tmp);
3759
3760         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3761         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3762
3763         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3764         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3765         mutex_unlock(&rdev->grbm_idx_mutex);
3766
3767         udelay(50);
3768 }
3769
3770 /*
3771  * GPU scratch registers helpers function.
3772  */
3773 /**
3774  * cik_scratch_init - setup driver info for CP scratch regs
3775  *
3776  * @rdev: radeon_device pointer
3777  *
3778  * Set up the number and offset of the CP scratch registers.
3779  * NOTE: use of CP scratch registers is a legacy inferface and
3780  * is not used by default on newer asics (r6xx+).  On newer asics,
3781  * memory buffers are used for fences rather than scratch regs.
3782  */
3783 static void cik_scratch_init(struct radeon_device *rdev)
3784 {
3785         int i;
3786
3787         rdev->scratch.num_reg = 7;
3788         rdev->scratch.reg_base = SCRATCH_REG0;
3789         for (i = 0; i < rdev->scratch.num_reg; i++) {
3790                 rdev->scratch.free[i] = true;
3791                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3792         }
3793 }
3794
3795 /**
3796  * cik_ring_test - basic gfx ring test
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ring: radeon_ring structure holding ring information
3800  *
3801  * Allocate a scratch register and write to it using the gfx ring (CIK).
3802  * Provides a basic gfx ring test to verify that the ring is working.
3803  * Used by cik_cp_gfx_resume();
3804  * Returns 0 on success, error on failure.
3805  */
3806 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807 {
3808         uint32_t scratch;
3809         uint32_t tmp = 0;
3810         unsigned i;
3811         int r;
3812
3813         r = radeon_scratch_get(rdev, &scratch);
3814         if (r) {
3815                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3816                 return r;
3817         }
3818         WREG32(scratch, 0xCAFEDEAD);
3819         r = radeon_ring_lock(rdev, ring, 3);
3820         if (r) {
3821                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3822                 radeon_scratch_free(rdev, scratch);
3823                 return r;
3824         }
3825         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3826         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3827         radeon_ring_write(ring, 0xDEADBEEF);
3828         radeon_ring_unlock_commit(rdev, ring, false);
3829
3830         for (i = 0; i < rdev->usec_timeout; i++) {
3831                 tmp = RREG32(scratch);
3832                 if (tmp == 0xDEADBEEF)
3833                         break;
3834                 DRM_UDELAY(1);
3835         }
3836         if (i < rdev->usec_timeout) {
3837                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3838         } else {
3839                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3840                           ring->idx, scratch, tmp);
3841                 r = -EINVAL;
3842         }
3843         radeon_scratch_free(rdev, scratch);
3844         return r;
3845 }
3846
3847 /**
3848  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3849  *
3850  * @rdev: radeon_device pointer
3851  * @ridx: radeon ring index
3852  *
3853  * Emits an hdp flush on the cp.
3854  */
3855 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3856                                        int ridx)
3857 {
3858         struct radeon_ring *ring = &rdev->ring[ridx];
3859         u32 ref_and_mask;
3860
3861         switch (ring->idx) {
3862         case CAYMAN_RING_TYPE_CP1_INDEX:
3863         case CAYMAN_RING_TYPE_CP2_INDEX:
3864         default:
3865                 switch (ring->me) {
3866                 case 0:
3867                         ref_and_mask = CP2 << ring->pipe;
3868                         break;
3869                 case 1:
3870                         ref_and_mask = CP6 << ring->pipe;
3871                         break;
3872                 default:
3873                         return;
3874                 }
3875                 break;
3876         case RADEON_RING_TYPE_GFX_INDEX:
3877                 ref_and_mask = CP0;
3878                 break;
3879         }
3880
3881         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3882         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3883                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3884                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3885         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3886         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3887         radeon_ring_write(ring, ref_and_mask);
3888         radeon_ring_write(ring, ref_and_mask);
3889         radeon_ring_write(ring, 0x20); /* poll interval */
3890 }
3891
3892 /**
3893  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3894  *
3895  * @rdev: radeon_device pointer
3896  * @fence: radeon fence object
3897  *
3898  * Emits a fence sequnce number on the gfx ring and flushes
3899  * GPU caches.
3900  */
3901 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3902                              struct radeon_fence *fence)
3903 {
3904         struct radeon_ring *ring = &rdev->ring[fence->ring];
3905         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3906
3907         /* EVENT_WRITE_EOP - flush caches, send int */
3908         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3909         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3910                                  EOP_TC_ACTION_EN |
3911                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3912                                  EVENT_INDEX(5)));
3913         radeon_ring_write(ring, addr & 0xfffffffc);
3914         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3915         radeon_ring_write(ring, fence->seq);
3916         radeon_ring_write(ring, 0);
3917 }
3918
3919 /**
3920  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3921  *
3922  * @rdev: radeon_device pointer
3923  * @fence: radeon fence object
3924  *
3925  * Emits a fence sequnce number on the compute ring and flushes
3926  * GPU caches.
3927  */
3928 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3929                                  struct radeon_fence *fence)
3930 {
3931         struct radeon_ring *ring = &rdev->ring[fence->ring];
3932         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3933
3934         /* RELEASE_MEM - flush caches, send int */
3935         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3936         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3937                                  EOP_TC_ACTION_EN |
3938                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3939                                  EVENT_INDEX(5)));
3940         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3941         radeon_ring_write(ring, addr & 0xfffffffc);
3942         radeon_ring_write(ring, upper_32_bits(addr));
3943         radeon_ring_write(ring, fence->seq);
3944         radeon_ring_write(ring, 0);
3945 }
3946
3947 /**
3948  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3949  *
3950  * @rdev: radeon_device pointer
3951  * @ring: radeon ring buffer object
3952  * @semaphore: radeon semaphore object
3953  * @emit_wait: Is this a sempahore wait?
3954  *
3955  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3956  * from running ahead of semaphore waits.
3957  */
3958 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3959                              struct radeon_ring *ring,
3960                              struct radeon_semaphore *semaphore,
3961                              bool emit_wait)
3962 {
3963         uint64_t addr = semaphore->gpu_addr;
3964         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3965
3966         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3967         radeon_ring_write(ring, lower_32_bits(addr));
3968         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3969
3970         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3971                 /* Prevent the PFP from running ahead of the semaphore wait */
3972                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3973                 radeon_ring_write(ring, 0x0);
3974         }
3975
3976         return true;
3977 }
3978
3979 /**
3980  * cik_copy_cpdma - copy pages using the CP DMA engine
3981  *
3982  * @rdev: radeon_device pointer
3983  * @src_offset: src GPU address
3984  * @dst_offset: dst GPU address
3985  * @num_gpu_pages: number of GPU pages to xfer
3986  * @resv: reservation object to sync to
3987  *
3988  * Copy GPU paging using the CP DMA engine (CIK+).
3989  * Used by the radeon ttm implementation to move pages if
3990  * registered as the asic copy callback.
3991  */
3992 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3993                                     uint64_t src_offset, uint64_t dst_offset,
3994                                     unsigned num_gpu_pages,
3995                                     struct reservation_object *resv)
3996 {
3997         struct radeon_semaphore *sem = NULL;
3998         struct radeon_fence *fence;
3999         int ring_index = rdev->asic->copy.blit_ring_index;
4000         struct radeon_ring *ring = &rdev->ring[ring_index];
4001         u32 size_in_bytes, cur_size_in_bytes, control;
4002         int i, num_loops;
4003         int r = 0;
4004
4005         r = radeon_semaphore_create(rdev, &sem);
4006         if (r) {
4007                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4008                 return ERR_PTR(r);
4009         }
4010
4011         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4012         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4013         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4014         if (r) {
4015                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4016                 radeon_semaphore_free(rdev, &sem, NULL);
4017                 return ERR_PTR(r);
4018         }
4019
4020         radeon_semaphore_sync_resv(rdev, sem, resv, false);
4021         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
4022
4023         for (i = 0; i < num_loops; i++) {
4024                 cur_size_in_bytes = size_in_bytes;
4025                 if (cur_size_in_bytes > 0x1fffff)
4026                         cur_size_in_bytes = 0x1fffff;
4027                 size_in_bytes -= cur_size_in_bytes;
4028                 control = 0;
4029                 if (size_in_bytes == 0)
4030                         control |= PACKET3_DMA_DATA_CP_SYNC;
4031                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4032                 radeon_ring_write(ring, control);
4033                 radeon_ring_write(ring, lower_32_bits(src_offset));
4034                 radeon_ring_write(ring, upper_32_bits(src_offset));
4035                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4036                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4037                 radeon_ring_write(ring, cur_size_in_bytes);
4038                 src_offset += cur_size_in_bytes;
4039                 dst_offset += cur_size_in_bytes;
4040         }
4041
4042         r = radeon_fence_emit(rdev, &fence, ring->idx);
4043         if (r) {
4044                 radeon_ring_unlock_undo(rdev, ring);
4045                 radeon_semaphore_free(rdev, &sem, NULL);
4046                 return ERR_PTR(r);
4047         }
4048
4049         radeon_ring_unlock_commit(rdev, ring, false);
4050         radeon_semaphore_free(rdev, &sem, fence);
4051
4052         return fence;
4053 }
4054
4055 /*
4056  * IB stuff
4057  */
4058 /**
4059  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4060  *
4061  * @rdev: radeon_device pointer
4062  * @ib: radeon indirect buffer object
4063  *
4064  * Emits an DE (drawing engine) or CE (constant engine) IB
4065  * on the gfx ring.  IBs are usually generated by userspace
4066  * acceleration drivers and submitted to the kernel for
4067  * sheduling on the ring.  This function schedules the IB
4068  * on the gfx ring for execution by the GPU.
4069  */
4070 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4071 {
4072         struct radeon_ring *ring = &rdev->ring[ib->ring];
4073         u32 header, control = INDIRECT_BUFFER_VALID;
4074
4075         if (ib->is_const_ib) {
4076                 /* set switch buffer packet before const IB */
4077                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4078                 radeon_ring_write(ring, 0);
4079
4080                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4081         } else {
4082                 u32 next_rptr;
4083                 if (ring->rptr_save_reg) {
4084                         next_rptr = ring->wptr + 3 + 4;
4085                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4086                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4087                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4088                         radeon_ring_write(ring, next_rptr);
4089                 } else if (rdev->wb.enabled) {
4090                         next_rptr = ring->wptr + 5 + 4;
4091                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4092                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4093                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4094                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4095                         radeon_ring_write(ring, next_rptr);
4096                 }
4097
4098                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4099         }
4100
4101         control |= ib->length_dw |
4102                 (ib->vm ? (ib->vm->id << 24) : 0);
4103
4104         radeon_ring_write(ring, header);
4105         radeon_ring_write(ring,
4106 #ifdef __BIG_ENDIAN
4107                           (2 << 0) |
4108 #endif
4109                           (ib->gpu_addr & 0xFFFFFFFC));
4110         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4111         radeon_ring_write(ring, control);
4112 }
4113
4114 /**
4115  * cik_ib_test - basic gfx ring IB test
4116  *
4117  * @rdev: radeon_device pointer
4118  * @ring: radeon_ring structure holding ring information
4119  *
4120  * Allocate an IB and execute it on the gfx ring (CIK).
4121  * Provides a basic gfx ring test to verify that IBs are working.
4122  * Returns 0 on success, error on failure.
4123  */
4124 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4125 {
4126         struct radeon_ib ib;
4127         uint32_t scratch;
4128         uint32_t tmp = 0;
4129         unsigned i;
4130         int r;
4131
4132         r = radeon_scratch_get(rdev, &scratch);
4133         if (r) {
4134                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4135                 return r;
4136         }
4137         WREG32(scratch, 0xCAFEDEAD);
4138         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4139         if (r) {
4140                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4141                 radeon_scratch_free(rdev, scratch);
4142                 return r;
4143         }
4144         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4145         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4146         ib.ptr[2] = 0xDEADBEEF;
4147         ib.length_dw = 3;
4148         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4149         if (r) {
4150                 radeon_scratch_free(rdev, scratch);
4151                 radeon_ib_free(rdev, &ib);
4152                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4153                 return r;
4154         }
4155         r = radeon_fence_wait(ib.fence, false);
4156         if (r) {
4157                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4158                 radeon_scratch_free(rdev, scratch);
4159                 radeon_ib_free(rdev, &ib);
4160                 return r;
4161         }
4162         for (i = 0; i < rdev->usec_timeout; i++) {
4163                 tmp = RREG32(scratch);
4164                 if (tmp == 0xDEADBEEF)
4165                         break;
4166                 DRM_UDELAY(1);
4167         }
4168         if (i < rdev->usec_timeout) {
4169                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4170         } else {
4171                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4172                           scratch, tmp);
4173                 r = -EINVAL;
4174         }
4175         radeon_scratch_free(rdev, scratch);
4176         radeon_ib_free(rdev, &ib);
4177         return r;
4178 }
4179
4180 /*
4181  * CP.
4182  * On CIK, gfx and compute now have independant command processors.
4183  *
4184  * GFX
4185  * Gfx consists of a single ring and can process both gfx jobs and
4186  * compute jobs.  The gfx CP consists of three microengines (ME):
4187  * PFP - Pre-Fetch Parser
4188  * ME - Micro Engine
4189  * CE - Constant Engine
4190  * The PFP and ME make up what is considered the Drawing Engine (DE).
4191  * The CE is an asynchronous engine used for updating buffer desciptors
4192  * used by the DE so that they can be loaded into cache in parallel
4193  * while the DE is processing state update packets.
4194  *
4195  * Compute
4196  * The compute CP consists of two microengines (ME):
4197  * MEC1 - Compute MicroEngine 1
4198  * MEC2 - Compute MicroEngine 2
4199  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4200  * The queues are exposed to userspace and are programmed directly
4201  * by the compute runtime.
4202  */
4203 /**
4204  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4205  *
4206  * @rdev: radeon_device pointer
4207  * @enable: enable or disable the MEs
4208  *
4209  * Halts or unhalts the gfx MEs.
4210  */
4211 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4212 {
4213         if (enable)
4214                 WREG32(CP_ME_CNTL, 0);
4215         else {
4216                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4217                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4218                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4219                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4220         }
4221         udelay(50);
4222 }
4223
4224 /**
4225  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4226  *
4227  * @rdev: radeon_device pointer
4228  *
4229  * Loads the gfx PFP, ME, and CE ucode.
4230  * Returns 0 for success, -EINVAL if the ucode is not available.
4231  */
4232 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4233 {
4234         int i;
4235
4236         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4237                 return -EINVAL;
4238
4239         cik_cp_gfx_enable(rdev, false);
4240
4241         if (rdev->new_fw) {
4242                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4243                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4244                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4245                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4246                 const struct gfx_firmware_header_v1_0 *me_hdr =
4247                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4248                 const __le32 *fw_data;
4249                 u32 fw_size;
4250
4251                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4252                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4253                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4254
4255                 /* PFP */
4256                 fw_data = (const __le32 *)
4257                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4258                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4259                 WREG32(CP_PFP_UCODE_ADDR, 0);
4260                 for (i = 0; i < fw_size; i++)
4261                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4262                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4263
4264                 /* CE */
4265                 fw_data = (const __le32 *)
4266                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4267                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4268                 WREG32(CP_CE_UCODE_ADDR, 0);
4269                 for (i = 0; i < fw_size; i++)
4270                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4271                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4272
4273                 /* ME */
4274                 fw_data = (const __be32 *)
4275                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4276                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4277                 WREG32(CP_ME_RAM_WADDR, 0);
4278                 for (i = 0; i < fw_size; i++)
4279                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4280                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4281                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4282         } else {
4283                 const __be32 *fw_data;
4284
4285                 /* PFP */
4286                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4287                 WREG32(CP_PFP_UCODE_ADDR, 0);
4288                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4289                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4290                 WREG32(CP_PFP_UCODE_ADDR, 0);
4291
4292                 /* CE */
4293                 fw_data = (const __be32 *)rdev->ce_fw->data;
4294                 WREG32(CP_CE_UCODE_ADDR, 0);
4295                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4296                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4297                 WREG32(CP_CE_UCODE_ADDR, 0);
4298
4299                 /* ME */
4300                 fw_data = (const __be32 *)rdev->me_fw->data;
4301                 WREG32(CP_ME_RAM_WADDR, 0);
4302                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4303                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4304                 WREG32(CP_ME_RAM_WADDR, 0);
4305         }
4306
4307         return 0;
4308 }
4309
4310 /**
4311  * cik_cp_gfx_start - start the gfx ring
4312  *
4313  * @rdev: radeon_device pointer
4314  *
4315  * Enables the ring and loads the clear state context and other
4316  * packets required to init the ring.
4317  * Returns 0 for success, error for failure.
4318  */
4319 static int cik_cp_gfx_start(struct radeon_device *rdev)
4320 {
4321         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4322         int r, i;
4323
4324         /* init the CP */
4325         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4326         WREG32(CP_ENDIAN_SWAP, 0);
4327         WREG32(CP_DEVICE_ID, 1);
4328
4329         cik_cp_gfx_enable(rdev, true);
4330
4331         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4332         if (r) {
4333                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4334                 return r;
4335         }
4336
4337         /* init the CE partitions.  CE only used for gfx on CIK */
4338         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4339         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4340         radeon_ring_write(ring, 0xc000);
4341         radeon_ring_write(ring, 0xc000);
4342
4343         /* setup clear context state */
4344         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4345         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4346
4347         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4348         radeon_ring_write(ring, 0x80000000);
4349         radeon_ring_write(ring, 0x80000000);
4350
4351         for (i = 0; i < cik_default_size; i++)
4352                 radeon_ring_write(ring, cik_default_state[i]);
4353
4354         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4355         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4356
4357         /* set clear context state */
4358         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4359         radeon_ring_write(ring, 0);
4360
4361         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4362         radeon_ring_write(ring, 0x00000316);
4363         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4364         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4365
4366         radeon_ring_unlock_commit(rdev, ring, false);
4367
4368         return 0;
4369 }
4370
4371 /**
4372  * cik_cp_gfx_fini - stop the gfx ring
4373  *
4374  * @rdev: radeon_device pointer
4375  *
4376  * Stop the gfx ring and tear down the driver ring
4377  * info.
4378  */
4379 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4380 {
4381         cik_cp_gfx_enable(rdev, false);
4382         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4383 }
4384
4385 /**
4386  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4387  *
4388  * @rdev: radeon_device pointer
4389  *
4390  * Program the location and size of the gfx ring buffer
4391  * and test it to make sure it's working.
4392  * Returns 0 for success, error for failure.
4393  */
4394 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4395 {
4396         struct radeon_ring *ring;
4397         u32 tmp;
4398         u32 rb_bufsz;
4399         u64 rb_addr;
4400         int r;
4401
4402         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4403         if (rdev->family != CHIP_HAWAII)
4404                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4405
4406         /* Set the write pointer delay */
4407         WREG32(CP_RB_WPTR_DELAY, 0);
4408
4409         /* set the RB to use vmid 0 */
4410         WREG32(CP_RB_VMID, 0);
4411
4412         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4413
4414         /* ring 0 - compute and gfx */
4415         /* Set ring buffer size */
4416         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4417         rb_bufsz = order_base_2(ring->ring_size / 8);
4418         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4419 #ifdef __BIG_ENDIAN
4420         tmp |= BUF_SWAP_32BIT;
4421 #endif
4422         WREG32(CP_RB0_CNTL, tmp);
4423
4424         /* Initialize the ring buffer's read and write pointers */
4425         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4426         ring->wptr = 0;
4427         WREG32(CP_RB0_WPTR, ring->wptr);
4428
4429         /* set the wb address wether it's enabled or not */
4430         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4431         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4432
4433         /* scratch register shadowing is no longer supported */
4434         WREG32(SCRATCH_UMSK, 0);
4435
4436         if (!rdev->wb.enabled)
4437                 tmp |= RB_NO_UPDATE;
4438
4439         mdelay(1);
4440         WREG32(CP_RB0_CNTL, tmp);
4441
4442         rb_addr = ring->gpu_addr >> 8;
4443         WREG32(CP_RB0_BASE, rb_addr);
4444         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4445
4446         /* start the ring */
4447         cik_cp_gfx_start(rdev);
4448         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4449         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4450         if (r) {
4451                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4452                 return r;
4453         }
4454
4455         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4456                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4457
4458         return 0;
4459 }
4460
4461 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4462                      struct radeon_ring *ring)
4463 {
4464         u32 rptr;
4465
4466         if (rdev->wb.enabled)
4467                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4468         else
4469                 rptr = RREG32(CP_RB0_RPTR);
4470
4471         return rptr;
4472 }
4473
4474 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4475                      struct radeon_ring *ring)
4476 {
4477         u32 wptr;
4478
4479         wptr = RREG32(CP_RB0_WPTR);
4480
4481         return wptr;
4482 }
4483
4484 void cik_gfx_set_wptr(struct radeon_device *rdev,
4485                       struct radeon_ring *ring)
4486 {
4487         WREG32(CP_RB0_WPTR, ring->wptr);
4488         (void)RREG32(CP_RB0_WPTR);
4489 }
4490
4491 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4492                          struct radeon_ring *ring)
4493 {
4494         u32 rptr;
4495
4496         if (rdev->wb.enabled) {
4497                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4498         } else {
4499                 mutex_lock(&rdev->srbm_mutex);
4500                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4501                 rptr = RREG32(CP_HQD_PQ_RPTR);
4502                 cik_srbm_select(rdev, 0, 0, 0, 0);
4503                 mutex_unlock(&rdev->srbm_mutex);
4504         }
4505
4506         return rptr;
4507 }
4508
4509 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4510                          struct radeon_ring *ring)
4511 {
4512         u32 wptr;
4513
4514         if (rdev->wb.enabled) {
4515                 /* XXX check if swapping is necessary on BE */
4516                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4517         } else {
4518                 mutex_lock(&rdev->srbm_mutex);
4519                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4520                 wptr = RREG32(CP_HQD_PQ_WPTR);
4521                 cik_srbm_select(rdev, 0, 0, 0, 0);
4522                 mutex_unlock(&rdev->srbm_mutex);
4523         }
4524
4525         return wptr;
4526 }
4527
4528 void cik_compute_set_wptr(struct radeon_device *rdev,
4529                           struct radeon_ring *ring)
4530 {
4531         /* XXX check if swapping is necessary on BE */
4532         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4533         WDOORBELL32(ring->doorbell_index, ring->wptr);
4534 }
4535
4536 /**
4537  * cik_cp_compute_enable - enable/disable the compute CP MEs
4538  *
4539  * @rdev: radeon_device pointer
4540  * @enable: enable or disable the MEs
4541  *
4542  * Halts or unhalts the compute MEs.
4543  */
4544 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4545 {
4546         if (enable)
4547                 WREG32(CP_MEC_CNTL, 0);
4548         else {
4549                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4550                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4551                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4552         }
4553         udelay(50);
4554 }
4555
4556 /**
4557  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4558  *
4559  * @rdev: radeon_device pointer
4560  *
4561  * Loads the compute MEC1&2 ucode.
4562  * Returns 0 for success, -EINVAL if the ucode is not available.
4563  */
4564 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4565 {
4566         int i;
4567
4568         if (!rdev->mec_fw)
4569                 return -EINVAL;
4570
4571         cik_cp_compute_enable(rdev, false);
4572
4573         if (rdev->new_fw) {
4574                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4575                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4576                 const __le32 *fw_data;
4577                 u32 fw_size;
4578
4579                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4580
4581                 /* MEC1 */
4582                 fw_data = (const __le32 *)
4583                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4584                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4585                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4586                 for (i = 0; i < fw_size; i++)
4587                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4588                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4589
4590                 /* MEC2 */
4591                 if (rdev->family == CHIP_KAVERI) {
4592                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4593                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4594
4595                         fw_data = (const __le32 *)
4596                                 (rdev->mec2_fw->data +
4597                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4598                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4599                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4600                         for (i = 0; i < fw_size; i++)
4601                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4602                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4603                 }
4604         } else {
4605                 const __be32 *fw_data;
4606
4607                 /* MEC1 */
4608                 fw_data = (const __be32 *)rdev->mec_fw->data;
4609                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4610                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4611                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4612                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4613
4614                 if (rdev->family == CHIP_KAVERI) {
4615                         /* MEC2 */
4616                         fw_data = (const __be32 *)rdev->mec_fw->data;
4617                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4618                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4619                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4620                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4621                 }
4622         }
4623
4624         return 0;
4625 }
4626
4627 /**
4628  * cik_cp_compute_start - start the compute queues
4629  *
4630  * @rdev: radeon_device pointer
4631  *
4632  * Enable the compute queues.
4633  * Returns 0 for success, error for failure.
4634  */
4635 static int cik_cp_compute_start(struct radeon_device *rdev)
4636 {
4637         cik_cp_compute_enable(rdev, true);
4638
4639         return 0;
4640 }
4641
4642 /**
4643  * cik_cp_compute_fini - stop the compute queues
4644  *
4645  * @rdev: radeon_device pointer
4646  *
4647  * Stop the compute queues and tear down the driver queue
4648  * info.
4649  */
4650 static void cik_cp_compute_fini(struct radeon_device *rdev)
4651 {
4652         int i, idx, r;
4653
4654         cik_cp_compute_enable(rdev, false);
4655
4656         for (i = 0; i < 2; i++) {
4657                 if (i == 0)
4658                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4659                 else
4660                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4661
4662                 if (rdev->ring[idx].mqd_obj) {
4663                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4664                         if (unlikely(r != 0))
4665                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4666
4667                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4668                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4669
4670                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4671                         rdev->ring[idx].mqd_obj = NULL;
4672                 }
4673         }
4674 }
4675
4676 static void cik_mec_fini(struct radeon_device *rdev)
4677 {
4678         int r;
4679
4680         if (rdev->mec.hpd_eop_obj) {
4681                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4682                 if (unlikely(r != 0))
4683                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4684                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4685                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4686
4687                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4688                 rdev->mec.hpd_eop_obj = NULL;
4689         }
4690 }
4691
4692 #define MEC_HPD_SIZE 2048
4693
4694 static int cik_mec_init(struct radeon_device *rdev)
4695 {
4696         int r;
4697         u32 *hpd;
4698
4699         /*
4700          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4701          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4702          * Nonetheless, we assign only 1 pipe because all other pipes will
4703          * be handled by KFD
4704          */
4705         rdev->mec.num_mec = 1;
4706         rdev->mec.num_pipe = 1;
4707         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4708
4709         if (rdev->mec.hpd_eop_obj == NULL) {
4710                 r = radeon_bo_create(rdev,
4711                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4712                                      PAGE_SIZE, true,
4713                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4714                                      &rdev->mec.hpd_eop_obj);
4715                 if (r) {
4716                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4717                         return r;
4718                 }
4719         }
4720
4721         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4722         if (unlikely(r != 0)) {
4723                 cik_mec_fini(rdev);
4724                 return r;
4725         }
4726         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4727                           &rdev->mec.hpd_eop_gpu_addr);
4728         if (r) {
4729                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4730                 cik_mec_fini(rdev);
4731                 return r;
4732         }
4733         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4734         if (r) {
4735                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4736                 cik_mec_fini(rdev);
4737                 return r;
4738         }
4739
4740         /* clear memory.  Not sure if this is required or not */
4741         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4742
4743         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4744         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4745
4746         return 0;
4747 }
4748
4749 struct hqd_registers
4750 {
4751         u32 cp_mqd_base_addr;
4752         u32 cp_mqd_base_addr_hi;
4753         u32 cp_hqd_active;
4754         u32 cp_hqd_vmid;
4755         u32 cp_hqd_persistent_state;
4756         u32 cp_hqd_pipe_priority;
4757         u32 cp_hqd_queue_priority;
4758         u32 cp_hqd_quantum;
4759         u32 cp_hqd_pq_base;
4760         u32 cp_hqd_pq_base_hi;
4761         u32 cp_hqd_pq_rptr;
4762         u32 cp_hqd_pq_rptr_report_addr;
4763         u32 cp_hqd_pq_rptr_report_addr_hi;
4764         u32 cp_hqd_pq_wptr_poll_addr;
4765         u32 cp_hqd_pq_wptr_poll_addr_hi;
4766         u32 cp_hqd_pq_doorbell_control;
4767         u32 cp_hqd_pq_wptr;
4768         u32 cp_hqd_pq_control;
4769         u32 cp_hqd_ib_base_addr;
4770         u32 cp_hqd_ib_base_addr_hi;
4771         u32 cp_hqd_ib_rptr;
4772         u32 cp_hqd_ib_control;
4773         u32 cp_hqd_iq_timer;
4774         u32 cp_hqd_iq_rptr;
4775         u32 cp_hqd_dequeue_request;
4776         u32 cp_hqd_dma_offload;
4777         u32 cp_hqd_sema_cmd;
4778         u32 cp_hqd_msg_type;
4779         u32 cp_hqd_atomic0_preop_lo;
4780         u32 cp_hqd_atomic0_preop_hi;
4781         u32 cp_hqd_atomic1_preop_lo;
4782         u32 cp_hqd_atomic1_preop_hi;
4783         u32 cp_hqd_hq_scheduler0;
4784         u32 cp_hqd_hq_scheduler1;
4785         u32 cp_mqd_control;
4786 };
4787
4788 struct bonaire_mqd
4789 {
4790         u32 header;
4791         u32 dispatch_initiator;
4792         u32 dimensions[3];
4793         u32 start_idx[3];
4794         u32 num_threads[3];
4795         u32 pipeline_stat_enable;
4796         u32 perf_counter_enable;
4797         u32 pgm[2];
4798         u32 tba[2];
4799         u32 tma[2];
4800         u32 pgm_rsrc[2];
4801         u32 vmid;
4802         u32 resource_limits;
4803         u32 static_thread_mgmt01[2];
4804         u32 tmp_ring_size;
4805         u32 static_thread_mgmt23[2];
4806         u32 restart[3];
4807         u32 thread_trace_enable;
4808         u32 reserved1;
4809         u32 user_data[16];
4810         u32 vgtcs_invoke_count[2];
4811         struct hqd_registers queue_state;
4812         u32 dequeue_cntr;
4813         u32 interrupt_queue[64];
4814 };
4815
4816 /**
4817  * cik_cp_compute_resume - setup the compute queue registers
4818  *
4819  * @rdev: radeon_device pointer
4820  *
4821  * Program the compute queues and test them to make sure they
4822  * are working.
4823  * Returns 0 for success, error for failure.
4824  */
4825 static int cik_cp_compute_resume(struct radeon_device *rdev)
4826 {
4827         int r, i, j, idx;
4828         u32 tmp;
4829         bool use_doorbell = true;
4830         u64 hqd_gpu_addr;
4831         u64 mqd_gpu_addr;
4832         u64 eop_gpu_addr;
4833         u64 wb_gpu_addr;
4834         u32 *buf;
4835         struct bonaire_mqd *mqd;
4836
4837         r = cik_cp_compute_start(rdev);
4838         if (r)
4839                 return r;
4840
4841         /* fix up chicken bits */
4842         tmp = RREG32(CP_CPF_DEBUG);
4843         tmp |= (1 << 23);
4844         WREG32(CP_CPF_DEBUG, tmp);
4845
4846         /* init the pipes */
4847         mutex_lock(&rdev->srbm_mutex);
4848
4849         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4850
4851         cik_srbm_select(rdev, 0, 0, 0, 0);
4852
4853         /* write the EOP addr */
4854         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4855         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4856
4857         /* set the VMID assigned */
4858         WREG32(CP_HPD_EOP_VMID, 0);
4859
4860         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4861         tmp = RREG32(CP_HPD_EOP_CONTROL);
4862         tmp &= ~EOP_SIZE_MASK;
4863         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4864         WREG32(CP_HPD_EOP_CONTROL, tmp);
4865
4866         mutex_unlock(&rdev->srbm_mutex);
4867
4868         /* init the queues.  Just two for now. */
4869         for (i = 0; i < 2; i++) {
4870                 if (i == 0)
4871                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4872                 else
4873                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4874
4875                 if (rdev->ring[idx].mqd_obj == NULL) {
4876                         r = radeon_bo_create(rdev,
4877                                              sizeof(struct bonaire_mqd),
4878                                              PAGE_SIZE, true,
4879                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4880                                              NULL, &rdev->ring[idx].mqd_obj);
4881                         if (r) {
4882                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4883                                 return r;
4884                         }
4885                 }
4886
4887                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4888                 if (unlikely(r != 0)) {
4889                         cik_cp_compute_fini(rdev);
4890                         return r;
4891                 }
4892                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4893                                   &mqd_gpu_addr);
4894                 if (r) {
4895                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4896                         cik_cp_compute_fini(rdev);
4897                         return r;
4898                 }
4899                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4900                 if (r) {
4901                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4902                         cik_cp_compute_fini(rdev);
4903                         return r;
4904                 }
4905
4906                 /* init the mqd struct */
4907                 memset(buf, 0, sizeof(struct bonaire_mqd));
4908
4909                 mqd = (struct bonaire_mqd *)buf;
4910                 mqd->header = 0xC0310800;
4911                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4912                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4913                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4914                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4915
4916                 mutex_lock(&rdev->srbm_mutex);
4917                 cik_srbm_select(rdev, rdev->ring[idx].me,
4918                                 rdev->ring[idx].pipe,
4919                                 rdev->ring[idx].queue, 0);
4920
4921                 /* disable wptr polling */
4922                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4923                 tmp &= ~WPTR_POLL_EN;
4924                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4925
4926                 /* enable doorbell? */
4927                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4928                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4929                 if (use_doorbell)
4930                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4931                 else
4932                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4933                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4934                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4935
4936                 /* disable the queue if it's active */
4937                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4938                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4939                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4940                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4941                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4942                         for (j = 0; j < rdev->usec_timeout; j++) {
4943                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4944                                         break;
4945                                 udelay(1);
4946                         }
4947                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4948                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4949                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4950                 }
4951
4952                 /* set the pointer to the MQD */
4953                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4954                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4955                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4956                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4957                 /* set MQD vmid to 0 */
4958                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4959                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4960                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4961
4962                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4963                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4964                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4965                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4966                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4967                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4968
4969                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4970                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4971                 mqd->queue_state.cp_hqd_pq_control &=
4972                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4973
4974                 mqd->queue_state.cp_hqd_pq_control |=
4975                         order_base_2(rdev->ring[idx].ring_size / 8);
4976                 mqd->queue_state.cp_hqd_pq_control |=
4977                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4978 #ifdef __BIG_ENDIAN
4979                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4980 #endif
4981                 mqd->queue_state.cp_hqd_pq_control &=
4982                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4983                 mqd->queue_state.cp_hqd_pq_control |=
4984                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4985                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4986
4987                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4988                 if (i == 0)
4989                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4990                 else
4991                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4992                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4993                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4994                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4995                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4996                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4997
4998                 /* set the wb address wether it's enabled or not */
4999                 if (i == 0)
5000                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5001                 else
5002                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5003                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5004                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5005                         upper_32_bits(wb_gpu_addr) & 0xffff;
5006                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5007                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5008                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5009                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5010
5011                 /* enable the doorbell if requested */
5012                 if (use_doorbell) {
5013                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5014                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5015                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5016                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5017                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5018                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5019                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5020                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5021
5022                 } else {
5023                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5024                 }
5025                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5026                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5027
5028                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5029                 rdev->ring[idx].wptr = 0;
5030                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5031                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5032                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5033
5034                 /* set the vmid for the queue */
5035                 mqd->queue_state.cp_hqd_vmid = 0;
5036                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5037
5038                 /* activate the queue */
5039                 mqd->queue_state.cp_hqd_active = 1;
5040                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5041
5042                 cik_srbm_select(rdev, 0, 0, 0, 0);
5043                 mutex_unlock(&rdev->srbm_mutex);
5044
5045                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5046                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5047
5048                 rdev->ring[idx].ready = true;
5049                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5050                 if (r)
5051                         rdev->ring[idx].ready = false;
5052         }
5053
5054         return 0;
5055 }
5056
5057 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5058 {
5059         cik_cp_gfx_enable(rdev, enable);
5060         cik_cp_compute_enable(rdev, enable);
5061 }
5062
5063 static int cik_cp_load_microcode(struct radeon_device *rdev)
5064 {
5065         int r;
5066
5067         r = cik_cp_gfx_load_microcode(rdev);
5068         if (r)
5069                 return r;
5070         r = cik_cp_compute_load_microcode(rdev);
5071         if (r)
5072                 return r;
5073
5074         return 0;
5075 }
5076
5077 static void cik_cp_fini(struct radeon_device *rdev)
5078 {
5079         cik_cp_gfx_fini(rdev);
5080         cik_cp_compute_fini(rdev);
5081 }
5082
5083 static int cik_cp_resume(struct radeon_device *rdev)
5084 {
5085         int r;
5086
5087         cik_enable_gui_idle_interrupt(rdev, false);
5088
5089         r = cik_cp_load_microcode(rdev);
5090         if (r)
5091                 return r;
5092
5093         r = cik_cp_gfx_resume(rdev);
5094         if (r)
5095                 return r;
5096         r = cik_cp_compute_resume(rdev);
5097         if (r)
5098                 return r;
5099
5100         cik_enable_gui_idle_interrupt(rdev, true);
5101
5102         return 0;
5103 }
5104
5105 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5106 {
5107         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5108                 RREG32(GRBM_STATUS));
5109         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5110                 RREG32(GRBM_STATUS2));
5111         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5112                 RREG32(GRBM_STATUS_SE0));
5113         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5114                 RREG32(GRBM_STATUS_SE1));
5115         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5116                 RREG32(GRBM_STATUS_SE2));
5117         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5118                 RREG32(GRBM_STATUS_SE3));
5119         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5120                 RREG32(SRBM_STATUS));
5121         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5122                 RREG32(SRBM_STATUS2));
5123         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5124                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5125         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5126                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5127         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5128         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5129                  RREG32(CP_STALLED_STAT1));
5130         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5131                  RREG32(CP_STALLED_STAT2));
5132         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5133                  RREG32(CP_STALLED_STAT3));
5134         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5135                  RREG32(CP_CPF_BUSY_STAT));
5136         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5137                  RREG32(CP_CPF_STALLED_STAT1));
5138         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5139         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5140         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5141                  RREG32(CP_CPC_STALLED_STAT1));
5142         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5143 }
5144
5145 /**
5146  * cik_gpu_check_soft_reset - check which blocks are busy
5147  *
5148  * @rdev: radeon_device pointer
5149  *
5150  * Check which blocks are busy and return the relevant reset
5151  * mask to be used by cik_gpu_soft_reset().
5152  * Returns a mask of the blocks to be reset.
5153  */
5154 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5155 {
5156         u32 reset_mask = 0;
5157         u32 tmp;
5158
5159         /* GRBM_STATUS */
5160         tmp = RREG32(GRBM_STATUS);
5161         if (tmp & (PA_BUSY | SC_BUSY |
5162                    BCI_BUSY | SX_BUSY |
5163                    TA_BUSY | VGT_BUSY |
5164                    DB_BUSY | CB_BUSY |
5165                    GDS_BUSY | SPI_BUSY |
5166                    IA_BUSY | IA_BUSY_NO_DMA))
5167                 reset_mask |= RADEON_RESET_GFX;
5168
5169         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5170                 reset_mask |= RADEON_RESET_CP;
5171
5172         /* GRBM_STATUS2 */
5173         tmp = RREG32(GRBM_STATUS2);
5174         if (tmp & RLC_BUSY)
5175                 reset_mask |= RADEON_RESET_RLC;
5176
5177         /* SDMA0_STATUS_REG */
5178         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5179         if (!(tmp & SDMA_IDLE))
5180                 reset_mask |= RADEON_RESET_DMA;
5181
5182         /* SDMA1_STATUS_REG */
5183         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5184         if (!(tmp & SDMA_IDLE))
5185                 reset_mask |= RADEON_RESET_DMA1;
5186
5187         /* SRBM_STATUS2 */
5188         tmp = RREG32(SRBM_STATUS2);
5189         if (tmp & SDMA_BUSY)
5190                 reset_mask |= RADEON_RESET_DMA;
5191
5192         if (tmp & SDMA1_BUSY)
5193                 reset_mask |= RADEON_RESET_DMA1;
5194
5195         /* SRBM_STATUS */
5196         tmp = RREG32(SRBM_STATUS);
5197
5198         if (tmp & IH_BUSY)
5199                 reset_mask |= RADEON_RESET_IH;
5200
5201         if (tmp & SEM_BUSY)
5202                 reset_mask |= RADEON_RESET_SEM;
5203
5204         if (tmp & GRBM_RQ_PENDING)
5205                 reset_mask |= RADEON_RESET_GRBM;
5206
5207         if (tmp & VMC_BUSY)
5208                 reset_mask |= RADEON_RESET_VMC;
5209
5210         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5211                    MCC_BUSY | MCD_BUSY))
5212                 reset_mask |= RADEON_RESET_MC;
5213
5214         if (evergreen_is_display_hung(rdev))
5215                 reset_mask |= RADEON_RESET_DISPLAY;
5216
5217         /* Skip MC reset as it's mostly likely not hung, just busy */
5218         if (reset_mask & RADEON_RESET_MC) {
5219                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5220                 reset_mask &= ~RADEON_RESET_MC;
5221         }
5222
5223         return reset_mask;
5224 }
5225
5226 /**
5227  * cik_gpu_soft_reset - soft reset GPU
5228  *
5229  * @rdev: radeon_device pointer
5230  * @reset_mask: mask of which blocks to reset
5231  *
5232  * Soft reset the blocks specified in @reset_mask.
5233  */
5234 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5235 {
5236         struct evergreen_mc_save save;
5237         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5238         u32 tmp;
5239
5240         if (reset_mask == 0)
5241                 return;
5242
5243         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5244
5245         cik_print_gpu_status_regs(rdev);
5246         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5247                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5248         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5249                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5250
5251         /* disable CG/PG */
5252         cik_fini_pg(rdev);
5253         cik_fini_cg(rdev);
5254
5255         /* stop the rlc */
5256         cik_rlc_stop(rdev);
5257
5258         /* Disable GFX parsing/prefetching */
5259         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5260
5261         /* Disable MEC parsing/prefetching */
5262         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5263
5264         if (reset_mask & RADEON_RESET_DMA) {
5265                 /* sdma0 */
5266                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5267                 tmp |= SDMA_HALT;
5268                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5269         }
5270         if (reset_mask & RADEON_RESET_DMA1) {
5271                 /* sdma1 */
5272                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5273                 tmp |= SDMA_HALT;
5274                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5275         }
5276
5277         evergreen_mc_stop(rdev, &save);
5278         if (evergreen_mc_wait_for_idle(rdev)) {
5279                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5280         }
5281
5282         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5283                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5284
5285         if (reset_mask & RADEON_RESET_CP) {
5286                 grbm_soft_reset |= SOFT_RESET_CP;
5287
5288                 srbm_soft_reset |= SOFT_RESET_GRBM;
5289         }
5290
5291         if (reset_mask & RADEON_RESET_DMA)
5292                 srbm_soft_reset |= SOFT_RESET_SDMA;
5293
5294         if (reset_mask & RADEON_RESET_DMA1)
5295                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5296
5297         if (reset_mask & RADEON_RESET_DISPLAY)
5298                 srbm_soft_reset |= SOFT_RESET_DC;
5299
5300         if (reset_mask & RADEON_RESET_RLC)
5301                 grbm_soft_reset |= SOFT_RESET_RLC;
5302
5303         if (reset_mask & RADEON_RESET_SEM)
5304                 srbm_soft_reset |= SOFT_RESET_SEM;
5305
5306         if (reset_mask & RADEON_RESET_IH)
5307                 srbm_soft_reset |= SOFT_RESET_IH;
5308
5309         if (reset_mask & RADEON_RESET_GRBM)
5310                 srbm_soft_reset |= SOFT_RESET_GRBM;
5311
5312         if (reset_mask & RADEON_RESET_VMC)
5313                 srbm_soft_reset |= SOFT_RESET_VMC;
5314
5315         if (!(rdev->flags & RADEON_IS_IGP)) {
5316                 if (reset_mask & RADEON_RESET_MC)
5317                         srbm_soft_reset |= SOFT_RESET_MC;
5318         }
5319
5320         if (grbm_soft_reset) {
5321                 tmp = RREG32(GRBM_SOFT_RESET);
5322                 tmp |= grbm_soft_reset;
5323                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5324                 WREG32(GRBM_SOFT_RESET, tmp);
5325                 tmp = RREG32(GRBM_SOFT_RESET);
5326
5327                 udelay(50);
5328
5329                 tmp &= ~grbm_soft_reset;
5330                 WREG32(GRBM_SOFT_RESET, tmp);
5331                 tmp = RREG32(GRBM_SOFT_RESET);
5332         }
5333
5334         if (srbm_soft_reset) {
5335                 tmp = RREG32(SRBM_SOFT_RESET);
5336                 tmp |= srbm_soft_reset;
5337                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5338                 WREG32(SRBM_SOFT_RESET, tmp);
5339                 tmp = RREG32(SRBM_SOFT_RESET);
5340
5341                 udelay(50);
5342
5343                 tmp &= ~srbm_soft_reset;
5344                 WREG32(SRBM_SOFT_RESET, tmp);
5345                 tmp = RREG32(SRBM_SOFT_RESET);
5346         }
5347
5348         /* Wait a little for things to settle down */
5349         udelay(50);
5350
5351         evergreen_mc_resume(rdev, &save);
5352         udelay(50);
5353
5354         cik_print_gpu_status_regs(rdev);
5355 }
5356
5357 struct kv_reset_save_regs {
5358         u32 gmcon_reng_execute;
5359         u32 gmcon_misc;
5360         u32 gmcon_misc3;
5361 };
5362
5363 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5364                                    struct kv_reset_save_regs *save)
5365 {
5366         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5367         save->gmcon_misc = RREG32(GMCON_MISC);
5368         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5369
5370         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5371         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5372                                                 STCTRL_STUTTER_EN));
5373 }
5374
5375 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5376                                       struct kv_reset_save_regs *save)
5377 {
5378         int i;
5379
5380         WREG32(GMCON_PGFSM_WRITE, 0);
5381         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5382
5383         for (i = 0; i < 5; i++)
5384                 WREG32(GMCON_PGFSM_WRITE, 0);
5385
5386         WREG32(GMCON_PGFSM_WRITE, 0);
5387         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5388
5389         for (i = 0; i < 5; i++)
5390                 WREG32(GMCON_PGFSM_WRITE, 0);
5391
5392         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5393         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5394
5395         for (i = 0; i < 5; i++)
5396                 WREG32(GMCON_PGFSM_WRITE, 0);
5397
5398         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5399         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5400
5401         for (i = 0; i < 5; i++)
5402                 WREG32(GMCON_PGFSM_WRITE, 0);
5403
5404         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5405         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5406
5407         for (i = 0; i < 5; i++)
5408                 WREG32(GMCON_PGFSM_WRITE, 0);
5409
5410         WREG32(GMCON_PGFSM_WRITE, 0);
5411         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5412
5413         for (i = 0; i < 5; i++)
5414                 WREG32(GMCON_PGFSM_WRITE, 0);
5415
5416         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5417         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5418
5419         for (i = 0; i < 5; i++)
5420                 WREG32(GMCON_PGFSM_WRITE, 0);
5421
5422         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5423         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5424
5425         for (i = 0; i < 5; i++)
5426                 WREG32(GMCON_PGFSM_WRITE, 0);
5427
5428         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5429         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5430
5431         for (i = 0; i < 5; i++)
5432                 WREG32(GMCON_PGFSM_WRITE, 0);
5433
5434         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5435         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5436
5437         for (i = 0; i < 5; i++)
5438                 WREG32(GMCON_PGFSM_WRITE, 0);
5439
5440         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5441         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5442
5443         WREG32(GMCON_MISC3, save->gmcon_misc3);
5444         WREG32(GMCON_MISC, save->gmcon_misc);
5445         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5446 }
5447
5448 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5449 {
5450         struct evergreen_mc_save save;
5451         struct kv_reset_save_regs kv_save = { 0 };
5452         u32 tmp, i;
5453
5454         dev_info(rdev->dev, "GPU pci config reset\n");
5455
5456         /* disable dpm? */
5457
5458         /* disable cg/pg */
5459         cik_fini_pg(rdev);
5460         cik_fini_cg(rdev);
5461
5462         /* Disable GFX parsing/prefetching */
5463         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5464
5465         /* Disable MEC parsing/prefetching */
5466         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5467
5468         /* sdma0 */
5469         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5470         tmp |= SDMA_HALT;
5471         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5472         /* sdma1 */
5473         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5474         tmp |= SDMA_HALT;
5475         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5476         /* XXX other engines? */
5477
5478         /* halt the rlc, disable cp internal ints */
5479         cik_rlc_stop(rdev);
5480
5481         udelay(50);
5482
5483         /* disable mem access */
5484         evergreen_mc_stop(rdev, &save);
5485         if (evergreen_mc_wait_for_idle(rdev)) {
5486                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5487         }
5488
5489         if (rdev->flags & RADEON_IS_IGP)
5490                 kv_save_regs_for_reset(rdev, &kv_save);
5491
5492         /* disable BM */
5493         pci_clear_master(rdev->pdev);
5494         /* reset */
5495         radeon_pci_config_reset(rdev);
5496
5497         udelay(100);
5498
5499         /* wait for asic to come out of reset */
5500         for (i = 0; i < rdev->usec_timeout; i++) {
5501                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5502                         break;
5503                 udelay(1);
5504         }
5505
5506         /* does asic init need to be run first??? */
5507         if (rdev->flags & RADEON_IS_IGP)
5508                 kv_restore_regs_for_reset(rdev, &kv_save);
5509 }
5510
5511 /**
5512  * cik_asic_reset - soft reset GPU
5513  *
5514  * @rdev: radeon_device pointer
5515  *
5516  * Look up which blocks are hung and attempt
5517  * to reset them.
5518  * Returns 0 for success.
5519  */
5520 int cik_asic_reset(struct radeon_device *rdev)
5521 {
5522         u32 reset_mask;
5523
5524         reset_mask = cik_gpu_check_soft_reset(rdev);
5525
5526         if (reset_mask)
5527                 r600_set_bios_scratch_engine_hung(rdev, true);
5528
5529         /* try soft reset */
5530         cik_gpu_soft_reset(rdev, reset_mask);
5531
5532         reset_mask = cik_gpu_check_soft_reset(rdev);
5533
5534         /* try pci config reset */
5535         if (reset_mask && radeon_hard_reset)
5536                 cik_gpu_pci_config_reset(rdev);
5537
5538         reset_mask = cik_gpu_check_soft_reset(rdev);
5539
5540         if (!reset_mask)
5541                 r600_set_bios_scratch_engine_hung(rdev, false);
5542
5543         return 0;
5544 }
5545
5546 /**
5547  * cik_gfx_is_lockup - check if the 3D engine is locked up
5548  *
5549  * @rdev: radeon_device pointer
5550  * @ring: radeon_ring structure holding ring information
5551  *
5552  * Check if the 3D engine is locked up (CIK).
5553  * Returns true if the engine is locked, false if not.
5554  */
5555 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5556 {
5557         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5558
5559         if (!(reset_mask & (RADEON_RESET_GFX |
5560                             RADEON_RESET_COMPUTE |
5561                             RADEON_RESET_CP))) {
5562                 radeon_ring_lockup_update(rdev, ring);
5563                 return false;
5564         }
5565         return radeon_ring_test_lockup(rdev, ring);
5566 }
5567
5568 /* MC */
5569 /**
5570  * cik_mc_program - program the GPU memory controller
5571  *
5572  * @rdev: radeon_device pointer
5573  *
5574  * Set the location of vram, gart, and AGP in the GPU's
5575  * physical address space (CIK).
5576  */
5577 static void cik_mc_program(struct radeon_device *rdev)
5578 {
5579         struct evergreen_mc_save save;
5580         u32 tmp;
5581         int i, j;
5582
5583         /* Initialize HDP */
5584         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5585                 WREG32((0x2c14 + j), 0x00000000);
5586                 WREG32((0x2c18 + j), 0x00000000);
5587                 WREG32((0x2c1c + j), 0x00000000);
5588                 WREG32((0x2c20 + j), 0x00000000);
5589                 WREG32((0x2c24 + j), 0x00000000);
5590         }
5591         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5592
5593         evergreen_mc_stop(rdev, &save);
5594         if (radeon_mc_wait_for_idle(rdev)) {
5595                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5596         }
5597         /* Lockout access through VGA aperture*/
5598         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5599         /* Update configuration */
5600         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5601                rdev->mc.vram_start >> 12);
5602         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5603                rdev->mc.vram_end >> 12);
5604         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5605                rdev->vram_scratch.gpu_addr >> 12);
5606         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5607         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5608         WREG32(MC_VM_FB_LOCATION, tmp);
5609         /* XXX double check these! */
5610         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5611         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5612         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5613         WREG32(MC_VM_AGP_BASE, 0);
5614         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5615         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5616         if (radeon_mc_wait_for_idle(rdev)) {
5617                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5618         }
5619         evergreen_mc_resume(rdev, &save);
5620         /* we need to own VRAM, so turn off the VGA renderer here
5621          * to stop it overwriting our objects */
5622         rv515_vga_render_disable(rdev);
5623 }
5624
5625 /**
5626  * cik_mc_init - initialize the memory controller driver params
5627  *
5628  * @rdev: radeon_device pointer
5629  *
5630  * Look up the amount of vram, vram width, and decide how to place
5631  * vram and gart within the GPU's physical address space (CIK).
5632  * Returns 0 for success.
5633  */
5634 static int cik_mc_init(struct radeon_device *rdev)
5635 {
5636         u32 tmp;
5637         int chansize, numchan;
5638
5639         /* Get VRAM informations */
5640         rdev->mc.vram_is_ddr = true;
5641         tmp = RREG32(MC_ARB_RAMCFG);
5642         if (tmp & CHANSIZE_MASK) {
5643                 chansize = 64;
5644         } else {
5645                 chansize = 32;
5646         }
5647         tmp = RREG32(MC_SHARED_CHMAP);
5648         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5649         case 0:
5650         default:
5651                 numchan = 1;
5652                 break;
5653         case 1:
5654                 numchan = 2;
5655                 break;
5656         case 2:
5657                 numchan = 4;
5658                 break;
5659         case 3:
5660                 numchan = 8;
5661                 break;
5662         case 4:
5663                 numchan = 3;
5664                 break;
5665         case 5:
5666                 numchan = 6;
5667                 break;
5668         case 6:
5669                 numchan = 10;
5670                 break;
5671         case 7:
5672                 numchan = 12;
5673                 break;
5674         case 8:
5675                 numchan = 16;
5676                 break;
5677         }
5678         rdev->mc.vram_width = numchan * chansize;
5679         /* Could aper size report 0 ? */
5680         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5681         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5682         /* size in MB on si */
5683         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5684         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5685         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5686         si_vram_gtt_location(rdev, &rdev->mc);
5687         radeon_update_bandwidth_info(rdev);
5688
5689         return 0;
5690 }
5691
5692 /*
5693  * GART
5694  * VMID 0 is the physical GPU addresses as used by the kernel.
5695  * VMIDs 1-15 are used for userspace clients and are handled
5696  * by the radeon vm/hsa code.
5697  */
5698 /**
5699  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5700  *
5701  * @rdev: radeon_device pointer
5702  *
5703  * Flush the TLB for the VMID 0 page table (CIK).
5704  */
5705 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5706 {
5707         /* flush hdp cache */
5708         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5709
5710         /* bits 0-15 are the VM contexts0-15 */
5711         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5712 }
5713
5714 /**
5715  * cik_pcie_gart_enable - gart enable
5716  *
5717  * @rdev: radeon_device pointer
5718  *
5719  * This sets up the TLBs, programs the page tables for VMID0,
5720  * sets up the hw for VMIDs 1-15 which are allocated on
5721  * demand, and sets up the global locations for the LDS, GDS,
5722  * and GPUVM for FSA64 clients (CIK).
5723  * Returns 0 for success, errors for failure.
5724  */
5725 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5726 {
5727         int r, i;
5728
5729         if (rdev->gart.robj == NULL) {
5730                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5731                 return -EINVAL;
5732         }
5733         r = radeon_gart_table_vram_pin(rdev);
5734         if (r)
5735                 return r;
5736         /* Setup TLB control */
5737         WREG32(MC_VM_MX_L1_TLB_CNTL,
5738                (0xA << 7) |
5739                ENABLE_L1_TLB |
5740                ENABLE_L1_FRAGMENT_PROCESSING |
5741                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5742                ENABLE_ADVANCED_DRIVER_MODEL |
5743                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5744         /* Setup L2 cache */
5745         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5746                ENABLE_L2_FRAGMENT_PROCESSING |
5747                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5748                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5749                EFFECTIVE_L2_QUEUE_SIZE(7) |
5750                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5751         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5752         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5753                BANK_SELECT(4) |
5754                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5755         /* setup context0 */
5756         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5757         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5758         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5759         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5760                         (u32)(rdev->dummy_page.addr >> 12));
5761         WREG32(VM_CONTEXT0_CNTL2, 0);
5762         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5763                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5764
5765         WREG32(0x15D4, 0);
5766         WREG32(0x15D8, 0);
5767         WREG32(0x15DC, 0);
5768
5769         /* restore context1-15 */
5770         /* set vm size, must be a multiple of 4 */
5771         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5772         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5773         for (i = 1; i < 16; i++) {
5774                 if (i < 8)
5775                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5776                                rdev->vm_manager.saved_table_addr[i]);
5777                 else
5778                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5779                                rdev->vm_manager.saved_table_addr[i]);
5780         }
5781
5782         /* enable context1-15 */
5783         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5784                (u32)(rdev->dummy_page.addr >> 12));
5785         WREG32(VM_CONTEXT1_CNTL2, 4);
5786         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5787                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5788                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5789                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5790                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5791                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5792                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5793                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5794                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5795                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5796                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5797                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5798                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5799                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5800
5801         if (rdev->family == CHIP_KAVERI) {
5802                 u32 tmp = RREG32(CHUB_CONTROL);
5803                 tmp &= ~BYPASS_VM;
5804                 WREG32(CHUB_CONTROL, tmp);
5805         }
5806
5807         /* XXX SH_MEM regs */
5808         /* where to put LDS, scratch, GPUVM in FSA64 space */
5809         mutex_lock(&rdev->srbm_mutex);
5810         for (i = 0; i < 16; i++) {
5811                 cik_srbm_select(rdev, 0, 0, 0, i);
5812                 /* CP and shaders */
5813                 WREG32(SH_MEM_CONFIG, 0);
5814                 WREG32(SH_MEM_APE1_BASE, 1);
5815                 WREG32(SH_MEM_APE1_LIMIT, 0);
5816                 WREG32(SH_MEM_BASES, 0);
5817                 /* SDMA GFX */
5818                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5819                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5820                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5821                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5822                 /* XXX SDMA RLC - todo */
5823         }
5824         cik_srbm_select(rdev, 0, 0, 0, 0);
5825         mutex_unlock(&rdev->srbm_mutex);
5826
5827         cik_pcie_gart_tlb_flush(rdev);
5828         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5829                  (unsigned)(rdev->mc.gtt_size >> 20),
5830                  (unsigned long long)rdev->gart.table_addr);
5831         rdev->gart.ready = true;
5832         return 0;
5833 }
5834
5835 /**
5836  * cik_pcie_gart_disable - gart disable
5837  *
5838  * @rdev: radeon_device pointer
5839  *
5840  * This disables all VM page table (CIK).
5841  */
5842 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5843 {
5844         unsigned i;
5845
5846         for (i = 1; i < 16; ++i) {
5847                 uint32_t reg;
5848                 if (i < 8)
5849                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5850                 else
5851                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5852                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5853         }
5854
5855         /* Disable all tables */
5856         WREG32(VM_CONTEXT0_CNTL, 0);
5857         WREG32(VM_CONTEXT1_CNTL, 0);
5858         /* Setup TLB control */
5859         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5860                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5861         /* Setup L2 cache */
5862         WREG32(VM_L2_CNTL,
5863                ENABLE_L2_FRAGMENT_PROCESSING |
5864                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5865                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5866                EFFECTIVE_L2_QUEUE_SIZE(7) |
5867                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5868         WREG32(VM_L2_CNTL2, 0);
5869         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5870                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5871         radeon_gart_table_vram_unpin(rdev);
5872 }
5873
5874 /**
5875  * cik_pcie_gart_fini - vm fini callback
5876  *
5877  * @rdev: radeon_device pointer
5878  *
5879  * Tears down the driver GART/VM setup (CIK).
5880  */
5881 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5882 {
5883         cik_pcie_gart_disable(rdev);
5884         radeon_gart_table_vram_free(rdev);
5885         radeon_gart_fini(rdev);
5886 }
5887
5888 /* vm parser */
5889 /**
5890  * cik_ib_parse - vm ib_parse callback
5891  *
5892  * @rdev: radeon_device pointer
5893  * @ib: indirect buffer pointer
5894  *
5895  * CIK uses hw IB checking so this is a nop (CIK).
5896  */
5897 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5898 {
5899         return 0;
5900 }
5901
5902 /*
5903  * vm
5904  * VMID 0 is the physical GPU addresses as used by the kernel.
5905  * VMIDs 1-15 are used for userspace clients and are handled
5906  * by the radeon vm/hsa code.
5907  */
5908 /**
5909  * cik_vm_init - cik vm init callback
5910  *
5911  * @rdev: radeon_device pointer
5912  *
5913  * Inits cik specific vm parameters (number of VMs, base of vram for
5914  * VMIDs 1-15) (CIK).
5915  * Returns 0 for success.
5916  */
5917 int cik_vm_init(struct radeon_device *rdev)
5918 {
5919         /*
5920          * number of VMs
5921          * VMID 0 is reserved for System
5922          * radeon graphics/compute will use VMIDs 1-7
5923          * amdkfd will use VMIDs 8-15
5924          */
5925         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5926         /* base offset of vram pages */
5927         if (rdev->flags & RADEON_IS_IGP) {
5928                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5929                 tmp <<= 22;
5930                 rdev->vm_manager.vram_base_offset = tmp;
5931         } else
5932                 rdev->vm_manager.vram_base_offset = 0;
5933
5934         return 0;
5935 }
5936
5937 /**
5938  * cik_vm_fini - cik vm fini callback
5939  *
5940  * @rdev: radeon_device pointer
5941  *
5942  * Tear down any asic specific VM setup (CIK).
5943  */
5944 void cik_vm_fini(struct radeon_device *rdev)
5945 {
5946 }
5947
5948 /**
5949  * cik_vm_decode_fault - print human readable fault info
5950  *
5951  * @rdev: radeon_device pointer
5952  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5953  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5954  *
5955  * Print human readable fault information (CIK).
5956  */
5957 static void cik_vm_decode_fault(struct radeon_device *rdev,
5958                                 u32 status, u32 addr, u32 mc_client)
5959 {
5960         u32 mc_id;
5961         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5962         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5963         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5964                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5965
5966         if (rdev->family == CHIP_HAWAII)
5967                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5968         else
5969                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5970
5971         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5972                protections, vmid, addr,
5973                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5974                block, mc_client, mc_id);
5975 }
5976
5977 /**
5978  * cik_vm_flush - cik vm flush using the CP
5979  *
5980  * @rdev: radeon_device pointer
5981  *
5982  * Update the page table base and flush the VM TLB
5983  * using the CP (CIK).
5984  */
5985 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5986 {
5987         struct radeon_ring *ring = &rdev->ring[ridx];
5988         int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5989
5990         if (vm == NULL)
5991                 return;
5992
5993         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5994         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5995                                  WRITE_DATA_DST_SEL(0)));
5996         if (vm->id < 8) {
5997                 radeon_ring_write(ring,
5998                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5999         } else {
6000                 radeon_ring_write(ring,
6001                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
6002         }
6003         radeon_ring_write(ring, 0);
6004         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
6005
6006         /* update SH_MEM_* regs */
6007         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6008         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6009                                  WRITE_DATA_DST_SEL(0)));
6010         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6011         radeon_ring_write(ring, 0);
6012         radeon_ring_write(ring, VMID(vm->id));
6013
6014         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6015         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6016                                  WRITE_DATA_DST_SEL(0)));
6017         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6018         radeon_ring_write(ring, 0);
6019
6020         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6021         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6022         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6023         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6024
6025         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6026         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6027                                  WRITE_DATA_DST_SEL(0)));
6028         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6029         radeon_ring_write(ring, 0);
6030         radeon_ring_write(ring, VMID(0));
6031
6032         /* HDP flush */
6033         cik_hdp_flush_cp_ring_emit(rdev, ridx);
6034
6035         /* bits 0-15 are the VM contexts0-15 */
6036         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6037         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6038                                  WRITE_DATA_DST_SEL(0)));
6039         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6040         radeon_ring_write(ring, 0);
6041         radeon_ring_write(ring, 1 << vm->id);
6042
6043         /* compute doesn't have PFP */
6044         if (usepfp) {
6045                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6046                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6047                 radeon_ring_write(ring, 0x0);
6048         }
6049 }
6050
6051 /*
6052  * RLC
6053  * The RLC is a multi-purpose microengine that handles a
6054  * variety of functions, the most important of which is
6055  * the interrupt controller.
6056  */
6057 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6058                                           bool enable)
6059 {
6060         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6061
6062         if (enable)
6063                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6064         else
6065                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6066         WREG32(CP_INT_CNTL_RING0, tmp);
6067 }
6068
6069 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6070 {
6071         u32 tmp;
6072
6073         tmp = RREG32(RLC_LB_CNTL);
6074         if (enable)
6075                 tmp |= LOAD_BALANCE_ENABLE;
6076         else
6077                 tmp &= ~LOAD_BALANCE_ENABLE;
6078         WREG32(RLC_LB_CNTL, tmp);
6079 }
6080
6081 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6082 {
6083         u32 i, j, k;
6084         u32 mask;
6085
6086         mutex_lock(&rdev->grbm_idx_mutex);
6087         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6088                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6089                         cik_select_se_sh(rdev, i, j);
6090                         for (k = 0; k < rdev->usec_timeout; k++) {
6091                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6092                                         break;
6093                                 udelay(1);
6094                         }
6095                 }
6096         }
6097         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6098         mutex_unlock(&rdev->grbm_idx_mutex);
6099
6100         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6101         for (k = 0; k < rdev->usec_timeout; k++) {
6102                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6103                         break;
6104                 udelay(1);
6105         }
6106 }
6107
6108 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6109 {
6110         u32 tmp;
6111
6112         tmp = RREG32(RLC_CNTL);
6113         if (tmp != rlc)
6114                 WREG32(RLC_CNTL, rlc);
6115 }
6116
6117 static u32 cik_halt_rlc(struct radeon_device *rdev)
6118 {
6119         u32 data, orig;
6120
6121         orig = data = RREG32(RLC_CNTL);
6122
6123         if (data & RLC_ENABLE) {
6124                 u32 i;
6125
6126                 data &= ~RLC_ENABLE;
6127                 WREG32(RLC_CNTL, data);
6128
6129                 for (i = 0; i < rdev->usec_timeout; i++) {
6130                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6131                                 break;
6132                         udelay(1);
6133                 }
6134
6135                 cik_wait_for_rlc_serdes(rdev);
6136         }
6137
6138         return orig;
6139 }
6140
6141 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6142 {
6143         u32 tmp, i, mask;
6144
6145         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6146         WREG32(RLC_GPR_REG2, tmp);
6147
6148         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6149         for (i = 0; i < rdev->usec_timeout; i++) {
6150                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6151                         break;
6152                 udelay(1);
6153         }
6154
6155         for (i = 0; i < rdev->usec_timeout; i++) {
6156                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6157                         break;
6158                 udelay(1);
6159         }
6160 }
6161
6162 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6163 {
6164         u32 tmp;
6165
6166         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6167         WREG32(RLC_GPR_REG2, tmp);
6168 }
6169
6170 /**
6171  * cik_rlc_stop - stop the RLC ME
6172  *
6173  * @rdev: radeon_device pointer
6174  *
6175  * Halt the RLC ME (MicroEngine) (CIK).
6176  */
6177 static void cik_rlc_stop(struct radeon_device *rdev)
6178 {
6179         WREG32(RLC_CNTL, 0);
6180
6181         cik_enable_gui_idle_interrupt(rdev, false);
6182
6183         cik_wait_for_rlc_serdes(rdev);
6184 }
6185
6186 /**
6187  * cik_rlc_start - start the RLC ME
6188  *
6189  * @rdev: radeon_device pointer
6190  *
6191  * Unhalt the RLC ME (MicroEngine) (CIK).
6192  */
6193 static void cik_rlc_start(struct radeon_device *rdev)
6194 {
6195         WREG32(RLC_CNTL, RLC_ENABLE);
6196
6197         cik_enable_gui_idle_interrupt(rdev, true);
6198
6199         udelay(50);
6200 }
6201
6202 /**
6203  * cik_rlc_resume - setup the RLC hw
6204  *
6205  * @rdev: radeon_device pointer
6206  *
6207  * Initialize the RLC registers, load the ucode,
6208  * and start the RLC (CIK).
6209  * Returns 0 for success, -EINVAL if the ucode is not available.
6210  */
6211 static int cik_rlc_resume(struct radeon_device *rdev)
6212 {
6213         u32 i, size, tmp;
6214
6215         if (!rdev->rlc_fw)
6216                 return -EINVAL;
6217
6218         cik_rlc_stop(rdev);
6219
6220         /* disable CG */
6221         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6222         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6223
6224         si_rlc_reset(rdev);
6225
6226         cik_init_pg(rdev);
6227
6228         cik_init_cg(rdev);
6229
6230         WREG32(RLC_LB_CNTR_INIT, 0);
6231         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6232
6233         mutex_lock(&rdev->grbm_idx_mutex);
6234         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6235         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6236         WREG32(RLC_LB_PARAMS, 0x00600408);
6237         WREG32(RLC_LB_CNTL, 0x80000004);
6238         mutex_unlock(&rdev->grbm_idx_mutex);
6239
6240         WREG32(RLC_MC_CNTL, 0);
6241         WREG32(RLC_UCODE_CNTL, 0);
6242
6243         if (rdev->new_fw) {
6244                 const struct rlc_firmware_header_v1_0 *hdr =
6245                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6246                 const __le32 *fw_data = (const __le32 *)
6247                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6248
6249                 radeon_ucode_print_rlc_hdr(&hdr->header);
6250
6251                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6252                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6253                 for (i = 0; i < size; i++)
6254                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6255                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6256         } else {
6257                 const __be32 *fw_data;
6258
6259                 switch (rdev->family) {
6260                 case CHIP_BONAIRE:
6261                 case CHIP_HAWAII:
6262                 default:
6263                         size = BONAIRE_RLC_UCODE_SIZE;
6264                         break;
6265                 case CHIP_KAVERI:
6266                         size = KV_RLC_UCODE_SIZE;
6267                         break;
6268                 case CHIP_KABINI:
6269                         size = KB_RLC_UCODE_SIZE;
6270                         break;
6271                 case CHIP_MULLINS:
6272                         size = ML_RLC_UCODE_SIZE;
6273                         break;
6274                 }
6275
6276                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6277                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6278                 for (i = 0; i < size; i++)
6279                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6280                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6281         }
6282
6283         /* XXX - find out what chips support lbpw */
6284         cik_enable_lbpw(rdev, false);
6285
6286         if (rdev->family == CHIP_BONAIRE)
6287                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6288
6289         cik_rlc_start(rdev);
6290
6291         return 0;
6292 }
6293
6294 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6295 {
6296         u32 data, orig, tmp, tmp2;
6297
6298         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6299
6300         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6301                 cik_enable_gui_idle_interrupt(rdev, true);
6302
6303                 tmp = cik_halt_rlc(rdev);
6304
6305                 mutex_lock(&rdev->grbm_idx_mutex);
6306                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6307                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6308                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6309                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6310                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6311                 mutex_unlock(&rdev->grbm_idx_mutex);
6312
6313                 cik_update_rlc(rdev, tmp);
6314
6315                 data |= CGCG_EN | CGLS_EN;
6316         } else {
6317                 cik_enable_gui_idle_interrupt(rdev, false);
6318
6319                 RREG32(CB_CGTT_SCLK_CTRL);
6320                 RREG32(CB_CGTT_SCLK_CTRL);
6321                 RREG32(CB_CGTT_SCLK_CTRL);
6322                 RREG32(CB_CGTT_SCLK_CTRL);
6323
6324                 data &= ~(CGCG_EN | CGLS_EN);
6325         }
6326
6327         if (orig != data)
6328                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6329
6330 }
6331
6332 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6333 {
6334         u32 data, orig, tmp = 0;
6335
6336         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6337                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6338                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6339                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6340                                 data |= CP_MEM_LS_EN;
6341                                 if (orig != data)
6342                                         WREG32(CP_MEM_SLP_CNTL, data);
6343                         }
6344                 }
6345
6346                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6347                 data |= 0x00000001;
6348                 data &= 0xfffffffd;
6349                 if (orig != data)
6350                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6351
6352                 tmp = cik_halt_rlc(rdev);
6353
6354                 mutex_lock(&rdev->grbm_idx_mutex);
6355                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6356                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6357                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6358                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6359                 WREG32(RLC_SERDES_WR_CTRL, data);
6360                 mutex_unlock(&rdev->grbm_idx_mutex);
6361
6362                 cik_update_rlc(rdev, tmp);
6363
6364                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6365                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6366                         data &= ~SM_MODE_MASK;
6367                         data |= SM_MODE(0x2);
6368                         data |= SM_MODE_ENABLE;
6369                         data &= ~CGTS_OVERRIDE;
6370                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6371                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6372                                 data &= ~CGTS_LS_OVERRIDE;
6373                         data &= ~ON_MONITOR_ADD_MASK;
6374                         data |= ON_MONITOR_ADD_EN;
6375                         data |= ON_MONITOR_ADD(0x96);
6376                         if (orig != data)
6377                                 WREG32(CGTS_SM_CTRL_REG, data);
6378                 }
6379         } else {
6380                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6381                 data |= 0x00000003;
6382                 if (orig != data)
6383                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6384
6385                 data = RREG32(RLC_MEM_SLP_CNTL);
6386                 if (data & RLC_MEM_LS_EN) {
6387                         data &= ~RLC_MEM_LS_EN;
6388                         WREG32(RLC_MEM_SLP_CNTL, data);
6389                 }
6390
6391                 data = RREG32(CP_MEM_SLP_CNTL);
6392                 if (data & CP_MEM_LS_EN) {
6393                         data &= ~CP_MEM_LS_EN;
6394                         WREG32(CP_MEM_SLP_CNTL, data);
6395                 }
6396
6397                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6398                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6399                 if (orig != data)
6400                         WREG32(CGTS_SM_CTRL_REG, data);
6401
6402                 tmp = cik_halt_rlc(rdev);
6403
6404                 mutex_lock(&rdev->grbm_idx_mutex);
6405                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6406                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6407                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6408                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6409                 WREG32(RLC_SERDES_WR_CTRL, data);
6410                 mutex_unlock(&rdev->grbm_idx_mutex);
6411
6412                 cik_update_rlc(rdev, tmp);
6413         }
6414 }
6415
6416 static const u32 mc_cg_registers[] =
6417 {
6418         MC_HUB_MISC_HUB_CG,
6419         MC_HUB_MISC_SIP_CG,
6420         MC_HUB_MISC_VM_CG,
6421         MC_XPB_CLK_GAT,
6422         ATC_MISC_CG,
6423         MC_CITF_MISC_WR_CG,
6424         MC_CITF_MISC_RD_CG,
6425         MC_CITF_MISC_VM_CG,
6426         VM_L2_CG,
6427 };
6428
6429 static void cik_enable_mc_ls(struct radeon_device *rdev,
6430                              bool enable)
6431 {
6432         int i;
6433         u32 orig, data;
6434
6435         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6436                 orig = data = RREG32(mc_cg_registers[i]);
6437                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6438                         data |= MC_LS_ENABLE;
6439                 else
6440                         data &= ~MC_LS_ENABLE;
6441                 if (data != orig)
6442                         WREG32(mc_cg_registers[i], data);
6443         }
6444 }
6445
6446 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6447                                bool enable)
6448 {
6449         int i;
6450         u32 orig, data;
6451
6452         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6453                 orig = data = RREG32(mc_cg_registers[i]);
6454                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6455                         data |= MC_CG_ENABLE;
6456                 else
6457                         data &= ~MC_CG_ENABLE;
6458                 if (data != orig)
6459                         WREG32(mc_cg_registers[i], data);
6460         }
6461 }
6462
6463 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6464                                  bool enable)
6465 {
6466         u32 orig, data;
6467
6468         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6469                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6470                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6471         } else {
6472                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6473                 data |= 0xff000000;
6474                 if (data != orig)
6475                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6476
6477                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6478                 data |= 0xff000000;
6479                 if (data != orig)
6480                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6481         }
6482 }
6483
6484 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6485                                  bool enable)
6486 {
6487         u32 orig, data;
6488
6489         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6490                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6491                 data |= 0x100;
6492                 if (orig != data)
6493                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6494
6495                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6496                 data |= 0x100;
6497                 if (orig != data)
6498                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6499         } else {
6500                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6501                 data &= ~0x100;
6502                 if (orig != data)
6503                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6504
6505                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6506                 data &= ~0x100;
6507                 if (orig != data)
6508                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6509         }
6510 }
6511
6512 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6513                                 bool enable)
6514 {
6515         u32 orig, data;
6516
6517         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6518                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6519                 data = 0xfff;
6520                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6521
6522                 orig = data = RREG32(UVD_CGC_CTRL);
6523                 data |= DCM;
6524                 if (orig != data)
6525                         WREG32(UVD_CGC_CTRL, data);
6526         } else {
6527                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6528                 data &= ~0xfff;
6529                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6530
6531                 orig = data = RREG32(UVD_CGC_CTRL);
6532                 data &= ~DCM;
6533                 if (orig != data)
6534                         WREG32(UVD_CGC_CTRL, data);
6535         }
6536 }
6537
6538 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6539                                bool enable)
6540 {
6541         u32 orig, data;
6542
6543         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6544
6545         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6546                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6547                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6548         else
6549                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6550                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6551
6552         if (orig != data)
6553                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6554 }
6555
6556 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6557                                 bool enable)
6558 {
6559         u32 orig, data;
6560
6561         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6562
6563         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6564                 data &= ~CLOCK_GATING_DIS;
6565         else
6566                 data |= CLOCK_GATING_DIS;
6567
6568         if (orig != data)
6569                 WREG32(HDP_HOST_PATH_CNTL, data);
6570 }
6571
6572 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6573                               bool enable)
6574 {
6575         u32 orig, data;
6576
6577         orig = data = RREG32(HDP_MEM_POWER_LS);
6578
6579         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6580                 data |= HDP_LS_ENABLE;
6581         else
6582                 data &= ~HDP_LS_ENABLE;
6583
6584         if (orig != data)
6585                 WREG32(HDP_MEM_POWER_LS, data);
6586 }
6587
6588 void cik_update_cg(struct radeon_device *rdev,
6589                    u32 block, bool enable)
6590 {
6591
6592         if (block & RADEON_CG_BLOCK_GFX) {
6593                 cik_enable_gui_idle_interrupt(rdev, false);
6594                 /* order matters! */
6595                 if (enable) {
6596                         cik_enable_mgcg(rdev, true);
6597                         cik_enable_cgcg(rdev, true);
6598                 } else {
6599                         cik_enable_cgcg(rdev, false);
6600                         cik_enable_mgcg(rdev, false);
6601                 }
6602                 cik_enable_gui_idle_interrupt(rdev, true);
6603         }
6604
6605         if (block & RADEON_CG_BLOCK_MC) {
6606                 if (!(rdev->flags & RADEON_IS_IGP)) {
6607                         cik_enable_mc_mgcg(rdev, enable);
6608                         cik_enable_mc_ls(rdev, enable);
6609                 }
6610         }
6611
6612         if (block & RADEON_CG_BLOCK_SDMA) {
6613                 cik_enable_sdma_mgcg(rdev, enable);
6614                 cik_enable_sdma_mgls(rdev, enable);
6615         }
6616
6617         if (block & RADEON_CG_BLOCK_BIF) {
6618                 cik_enable_bif_mgls(rdev, enable);
6619         }
6620
6621         if (block & RADEON_CG_BLOCK_UVD) {
6622                 if (rdev->has_uvd)
6623                         cik_enable_uvd_mgcg(rdev, enable);
6624         }
6625
6626         if (block & RADEON_CG_BLOCK_HDP) {
6627                 cik_enable_hdp_mgcg(rdev, enable);
6628                 cik_enable_hdp_ls(rdev, enable);
6629         }
6630
6631         if (block & RADEON_CG_BLOCK_VCE) {
6632                 vce_v2_0_enable_mgcg(rdev, enable);
6633         }
6634 }
6635
6636 static void cik_init_cg(struct radeon_device *rdev)
6637 {
6638
6639         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6640
6641         if (rdev->has_uvd)
6642                 si_init_uvd_internal_cg(rdev);
6643
6644         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6645                              RADEON_CG_BLOCK_SDMA |
6646                              RADEON_CG_BLOCK_BIF |
6647                              RADEON_CG_BLOCK_UVD |
6648                              RADEON_CG_BLOCK_HDP), true);
6649 }
6650
6651 static void cik_fini_cg(struct radeon_device *rdev)
6652 {
6653         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6654                              RADEON_CG_BLOCK_SDMA |
6655                              RADEON_CG_BLOCK_BIF |
6656                              RADEON_CG_BLOCK_UVD |
6657                              RADEON_CG_BLOCK_HDP), false);
6658
6659         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6660 }
6661
6662 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6663                                           bool enable)
6664 {
6665         u32 data, orig;
6666
6667         orig = data = RREG32(RLC_PG_CNTL);
6668         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6669                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6670         else
6671                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6672         if (orig != data)
6673                 WREG32(RLC_PG_CNTL, data);
6674 }
6675
6676 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6677                                           bool enable)
6678 {
6679         u32 data, orig;
6680
6681         orig = data = RREG32(RLC_PG_CNTL);
6682         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6683                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6684         else
6685                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6686         if (orig != data)
6687                 WREG32(RLC_PG_CNTL, data);
6688 }
6689
6690 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6691 {
6692         u32 data, orig;
6693
6694         orig = data = RREG32(RLC_PG_CNTL);
6695         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6696                 data &= ~DISABLE_CP_PG;
6697         else
6698                 data |= DISABLE_CP_PG;
6699         if (orig != data)
6700                 WREG32(RLC_PG_CNTL, data);
6701 }
6702
6703 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6704 {
6705         u32 data, orig;
6706
6707         orig = data = RREG32(RLC_PG_CNTL);
6708         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6709                 data &= ~DISABLE_GDS_PG;
6710         else
6711                 data |= DISABLE_GDS_PG;
6712         if (orig != data)
6713                 WREG32(RLC_PG_CNTL, data);
6714 }
6715
6716 #define CP_ME_TABLE_SIZE    96
6717 #define CP_ME_TABLE_OFFSET  2048
6718 #define CP_MEC_TABLE_OFFSET 4096
6719
6720 void cik_init_cp_pg_table(struct radeon_device *rdev)
6721 {
6722         volatile u32 *dst_ptr;
6723         int me, i, max_me = 4;
6724         u32 bo_offset = 0;
6725         u32 table_offset, table_size;
6726
6727         if (rdev->family == CHIP_KAVERI)
6728                 max_me = 5;
6729
6730         if (rdev->rlc.cp_table_ptr == NULL)
6731                 return;
6732
6733         /* write the cp table buffer */
6734         dst_ptr = rdev->rlc.cp_table_ptr;
6735         for (me = 0; me < max_me; me++) {
6736                 if (rdev->new_fw) {
6737                         const __le32 *fw_data;
6738                         const struct gfx_firmware_header_v1_0 *hdr;
6739
6740                         if (me == 0) {
6741                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6742                                 fw_data = (const __le32 *)
6743                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6744                                 table_offset = le32_to_cpu(hdr->jt_offset);
6745                                 table_size = le32_to_cpu(hdr->jt_size);
6746                         } else if (me == 1) {
6747                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6748                                 fw_data = (const __le32 *)
6749                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6750                                 table_offset = le32_to_cpu(hdr->jt_offset);
6751                                 table_size = le32_to_cpu(hdr->jt_size);
6752                         } else if (me == 2) {
6753                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6754                                 fw_data = (const __le32 *)
6755                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6756                                 table_offset = le32_to_cpu(hdr->jt_offset);
6757                                 table_size = le32_to_cpu(hdr->jt_size);
6758                         } else if (me == 3) {
6759                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6760                                 fw_data = (const __le32 *)
6761                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6762                                 table_offset = le32_to_cpu(hdr->jt_offset);
6763                                 table_size = le32_to_cpu(hdr->jt_size);
6764                         } else {
6765                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6766                                 fw_data = (const __le32 *)
6767                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6768                                 table_offset = le32_to_cpu(hdr->jt_offset);
6769                                 table_size = le32_to_cpu(hdr->jt_size);
6770                         }
6771
6772                         for (i = 0; i < table_size; i ++) {
6773                                 dst_ptr[bo_offset + i] =
6774                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6775                         }
6776                         bo_offset += table_size;
6777                 } else {
6778                         const __be32 *fw_data;
6779                         table_size = CP_ME_TABLE_SIZE;
6780
6781                         if (me == 0) {
6782                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6783                                 table_offset = CP_ME_TABLE_OFFSET;
6784                         } else if (me == 1) {
6785                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6786                                 table_offset = CP_ME_TABLE_OFFSET;
6787                         } else if (me == 2) {
6788                                 fw_data = (const __be32 *)rdev->me_fw->data;
6789                                 table_offset = CP_ME_TABLE_OFFSET;
6790                         } else {
6791                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6792                                 table_offset = CP_MEC_TABLE_OFFSET;
6793                         }
6794
6795                         for (i = 0; i < table_size; i ++) {
6796                                 dst_ptr[bo_offset + i] =
6797                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6798                         }
6799                         bo_offset += table_size;
6800                 }
6801         }
6802 }
6803
6804 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6805                                 bool enable)
6806 {
6807         u32 data, orig;
6808
6809         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6810                 orig = data = RREG32(RLC_PG_CNTL);
6811                 data |= GFX_PG_ENABLE;
6812                 if (orig != data)
6813                         WREG32(RLC_PG_CNTL, data);
6814
6815                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6816                 data |= AUTO_PG_EN;
6817                 if (orig != data)
6818                         WREG32(RLC_AUTO_PG_CTRL, data);
6819         } else {
6820                 orig = data = RREG32(RLC_PG_CNTL);
6821                 data &= ~GFX_PG_ENABLE;
6822                 if (orig != data)
6823                         WREG32(RLC_PG_CNTL, data);
6824
6825                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6826                 data &= ~AUTO_PG_EN;
6827                 if (orig != data)
6828                         WREG32(RLC_AUTO_PG_CTRL, data);
6829
6830                 data = RREG32(DB_RENDER_CONTROL);
6831         }
6832 }
6833
6834 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6835 {
6836         u32 mask = 0, tmp, tmp1;
6837         int i;
6838
6839         mutex_lock(&rdev->grbm_idx_mutex);
6840         cik_select_se_sh(rdev, se, sh);
6841         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6842         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6843         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6844         mutex_unlock(&rdev->grbm_idx_mutex);
6845
6846         tmp &= 0xffff0000;
6847
6848         tmp |= tmp1;
6849         tmp >>= 16;
6850
6851         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6852                 mask <<= 1;
6853                 mask |= 1;
6854         }
6855
6856         return (~tmp) & mask;
6857 }
6858
6859 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6860 {
6861         u32 i, j, k, active_cu_number = 0;
6862         u32 mask, counter, cu_bitmap;
6863         u32 tmp = 0;
6864
6865         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6866                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6867                         mask = 1;
6868                         cu_bitmap = 0;
6869                         counter = 0;
6870                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6871                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6872                                         if (counter < 2)
6873                                                 cu_bitmap |= mask;
6874                                         counter ++;
6875                                 }
6876                                 mask <<= 1;
6877                         }
6878
6879                         active_cu_number += counter;
6880                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6881                 }
6882         }
6883
6884         WREG32(RLC_PG_AO_CU_MASK, tmp);
6885
6886         tmp = RREG32(RLC_MAX_PG_CU);
6887         tmp &= ~MAX_PU_CU_MASK;
6888         tmp |= MAX_PU_CU(active_cu_number);
6889         WREG32(RLC_MAX_PG_CU, tmp);
6890 }
6891
6892 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6893                                        bool enable)
6894 {
6895         u32 data, orig;
6896
6897         orig = data = RREG32(RLC_PG_CNTL);
6898         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6899                 data |= STATIC_PER_CU_PG_ENABLE;
6900         else
6901                 data &= ~STATIC_PER_CU_PG_ENABLE;
6902         if (orig != data)
6903                 WREG32(RLC_PG_CNTL, data);
6904 }
6905
6906 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6907                                         bool enable)
6908 {
6909         u32 data, orig;
6910
6911         orig = data = RREG32(RLC_PG_CNTL);
6912         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6913                 data |= DYN_PER_CU_PG_ENABLE;
6914         else
6915                 data &= ~DYN_PER_CU_PG_ENABLE;
6916         if (orig != data)
6917                 WREG32(RLC_PG_CNTL, data);
6918 }
6919
6920 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6921 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6922
6923 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6924 {
6925         u32 data, orig;
6926         u32 i;
6927
6928         if (rdev->rlc.cs_data) {
6929                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6930                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6931                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6932                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6933         } else {
6934                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6935                 for (i = 0; i < 3; i++)
6936                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6937         }
6938         if (rdev->rlc.reg_list) {
6939                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6940                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6941                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6942         }
6943
6944         orig = data = RREG32(RLC_PG_CNTL);
6945         data |= GFX_PG_SRC;
6946         if (orig != data)
6947                 WREG32(RLC_PG_CNTL, data);
6948
6949         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6950         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6951
6952         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6953         data &= ~IDLE_POLL_COUNT_MASK;
6954         data |= IDLE_POLL_COUNT(0x60);
6955         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6956
6957         data = 0x10101010;
6958         WREG32(RLC_PG_DELAY, data);
6959
6960         data = RREG32(RLC_PG_DELAY_2);
6961         data &= ~0xff;
6962         data |= 0x3;
6963         WREG32(RLC_PG_DELAY_2, data);
6964
6965         data = RREG32(RLC_AUTO_PG_CTRL);
6966         data &= ~GRBM_REG_SGIT_MASK;
6967         data |= GRBM_REG_SGIT(0x700);
6968         WREG32(RLC_AUTO_PG_CTRL, data);
6969
6970 }
6971
6972 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6973 {
6974         cik_enable_gfx_cgpg(rdev, enable);
6975         cik_enable_gfx_static_mgpg(rdev, enable);
6976         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6977 }
6978
6979 u32 cik_get_csb_size(struct radeon_device *rdev)
6980 {
6981         u32 count = 0;
6982         const struct cs_section_def *sect = NULL;
6983         const struct cs_extent_def *ext = NULL;
6984
6985         if (rdev->rlc.cs_data == NULL)
6986                 return 0;
6987
6988         /* begin clear state */
6989         count += 2;
6990         /* context control state */
6991         count += 3;
6992
6993         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6994                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6995                         if (sect->id == SECT_CONTEXT)
6996                                 count += 2 + ext->reg_count;
6997                         else
6998                                 return 0;
6999                 }
7000         }
7001         /* pa_sc_raster_config/pa_sc_raster_config1 */
7002         count += 4;
7003         /* end clear state */
7004         count += 2;
7005         /* clear state */
7006         count += 2;
7007
7008         return count;
7009 }
7010
7011 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7012 {
7013         u32 count = 0, i;
7014         const struct cs_section_def *sect = NULL;
7015         const struct cs_extent_def *ext = NULL;
7016
7017         if (rdev->rlc.cs_data == NULL)
7018                 return;
7019         if (buffer == NULL)
7020                 return;
7021
7022         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7023         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7024
7025         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7026         buffer[count++] = cpu_to_le32(0x80000000);
7027         buffer[count++] = cpu_to_le32(0x80000000);
7028
7029         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7030                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7031                         if (sect->id == SECT_CONTEXT) {
7032                                 buffer[count++] =
7033                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7034                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7035                                 for (i = 0; i < ext->reg_count; i++)
7036                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7037                         } else {
7038                                 return;
7039                         }
7040                 }
7041         }
7042
7043         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7044         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7045         switch (rdev->family) {
7046         case CHIP_BONAIRE:
7047                 buffer[count++] = cpu_to_le32(0x16000012);
7048                 buffer[count++] = cpu_to_le32(0x00000000);
7049                 break;
7050         case CHIP_KAVERI:
7051                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7052                 buffer[count++] = cpu_to_le32(0x00000000);
7053                 break;
7054         case CHIP_KABINI:
7055         case CHIP_MULLINS:
7056                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7057                 buffer[count++] = cpu_to_le32(0x00000000);
7058                 break;
7059         case CHIP_HAWAII:
7060                 buffer[count++] = cpu_to_le32(0x3a00161a);
7061                 buffer[count++] = cpu_to_le32(0x0000002e);
7062                 break;
7063         default:
7064                 buffer[count++] = cpu_to_le32(0x00000000);
7065                 buffer[count++] = cpu_to_le32(0x00000000);
7066                 break;
7067         }
7068
7069         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7070         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7071
7072         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7073         buffer[count++] = cpu_to_le32(0);
7074 }
7075
7076 static void cik_init_pg(struct radeon_device *rdev)
7077 {
7078         if (rdev->pg_flags) {
7079                 cik_enable_sck_slowdown_on_pu(rdev, true);
7080                 cik_enable_sck_slowdown_on_pd(rdev, true);
7081                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7082                         cik_init_gfx_cgpg(rdev);
7083                         cik_enable_cp_pg(rdev, true);
7084                         cik_enable_gds_pg(rdev, true);
7085                 }
7086                 cik_init_ao_cu_mask(rdev);
7087                 cik_update_gfx_pg(rdev, true);
7088         }
7089 }
7090
7091 static void cik_fini_pg(struct radeon_device *rdev)
7092 {
7093         if (rdev->pg_flags) {
7094                 cik_update_gfx_pg(rdev, false);
7095                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7096                         cik_enable_cp_pg(rdev, false);
7097                         cik_enable_gds_pg(rdev, false);
7098                 }
7099         }
7100 }
7101
7102 /*
7103  * Interrupts
7104  * Starting with r6xx, interrupts are handled via a ring buffer.
7105  * Ring buffers are areas of GPU accessible memory that the GPU
7106  * writes interrupt vectors into and the host reads vectors out of.
7107  * There is a rptr (read pointer) that determines where the
7108  * host is currently reading, and a wptr (write pointer)
7109  * which determines where the GPU has written.  When the
7110  * pointers are equal, the ring is idle.  When the GPU
7111  * writes vectors to the ring buffer, it increments the
7112  * wptr.  When there is an interrupt, the host then starts
7113  * fetching commands and processing them until the pointers are
7114  * equal again at which point it updates the rptr.
7115  */
7116
7117 /**
7118  * cik_enable_interrupts - Enable the interrupt ring buffer
7119  *
7120  * @rdev: radeon_device pointer
7121  *
7122  * Enable the interrupt ring buffer (CIK).
7123  */
7124 static void cik_enable_interrupts(struct radeon_device *rdev)
7125 {
7126         u32 ih_cntl = RREG32(IH_CNTL);
7127         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7128
7129         ih_cntl |= ENABLE_INTR;
7130         ih_rb_cntl |= IH_RB_ENABLE;
7131         WREG32(IH_CNTL, ih_cntl);
7132         WREG32(IH_RB_CNTL, ih_rb_cntl);
7133         rdev->ih.enabled = true;
7134 }
7135
7136 /**
7137  * cik_disable_interrupts - Disable the interrupt ring buffer
7138  *
7139  * @rdev: radeon_device pointer
7140  *
7141  * Disable the interrupt ring buffer (CIK).
7142  */
7143 static void cik_disable_interrupts(struct radeon_device *rdev)
7144 {
7145         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7146         u32 ih_cntl = RREG32(IH_CNTL);
7147
7148         ih_rb_cntl &= ~IH_RB_ENABLE;
7149         ih_cntl &= ~ENABLE_INTR;
7150         WREG32(IH_RB_CNTL, ih_rb_cntl);
7151         WREG32(IH_CNTL, ih_cntl);
7152         /* set rptr, wptr to 0 */
7153         WREG32(IH_RB_RPTR, 0);
7154         WREG32(IH_RB_WPTR, 0);
7155         rdev->ih.enabled = false;
7156         rdev->ih.rptr = 0;
7157 }
7158
7159 /**
7160  * cik_disable_interrupt_state - Disable all interrupt sources
7161  *
7162  * @rdev: radeon_device pointer
7163  *
7164  * Clear all interrupt enable bits used by the driver (CIK).
7165  */
7166 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7167 {
7168         u32 tmp;
7169
7170         /* gfx ring */
7171         tmp = RREG32(CP_INT_CNTL_RING0) &
7172                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7173         WREG32(CP_INT_CNTL_RING0, tmp);
7174         /* sdma */
7175         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7176         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7177         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7178         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7179         /* compute queues */
7180         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7181         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7182         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7183         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7184         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7185         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7186         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7187         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7188         /* grbm */
7189         WREG32(GRBM_INT_CNTL, 0);
7190         /* vline/vblank, etc. */
7191         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7192         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7193         if (rdev->num_crtc >= 4) {
7194                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7195                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7196         }
7197         if (rdev->num_crtc >= 6) {
7198                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7199                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7200         }
7201         /* pflip */
7202         if (rdev->num_crtc >= 2) {
7203                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7204                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7205         }
7206         if (rdev->num_crtc >= 4) {
7207                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7208                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7209         }
7210         if (rdev->num_crtc >= 6) {
7211                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7212                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7213         }
7214
7215         /* dac hotplug */
7216         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7217
7218         /* digital hotplug */
7219         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7220         WREG32(DC_HPD1_INT_CONTROL, tmp);
7221         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7222         WREG32(DC_HPD2_INT_CONTROL, tmp);
7223         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7224         WREG32(DC_HPD3_INT_CONTROL, tmp);
7225         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7226         WREG32(DC_HPD4_INT_CONTROL, tmp);
7227         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7228         WREG32(DC_HPD5_INT_CONTROL, tmp);
7229         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7230         WREG32(DC_HPD6_INT_CONTROL, tmp);
7231
7232 }
7233
7234 /**
7235  * cik_irq_init - init and enable the interrupt ring
7236  *
7237  * @rdev: radeon_device pointer
7238  *
7239  * Allocate a ring buffer for the interrupt controller,
7240  * enable the RLC, disable interrupts, enable the IH
7241  * ring buffer and enable it (CIK).
7242  * Called at device load and reume.
7243  * Returns 0 for success, errors for failure.
7244  */
7245 static int cik_irq_init(struct radeon_device *rdev)
7246 {
7247         int ret = 0;
7248         int rb_bufsz;
7249         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7250
7251         /* allocate ring */
7252         ret = r600_ih_ring_alloc(rdev);
7253         if (ret)
7254                 return ret;
7255
7256         /* disable irqs */
7257         cik_disable_interrupts(rdev);
7258
7259         /* init rlc */
7260         ret = cik_rlc_resume(rdev);
7261         if (ret) {
7262                 r600_ih_ring_fini(rdev);
7263                 return ret;
7264         }
7265
7266         /* setup interrupt control */
7267         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7268         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7269         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7270         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7271          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7272          */
7273         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7274         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7275         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7276         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7277
7278         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7279         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7280
7281         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7282                       IH_WPTR_OVERFLOW_CLEAR |
7283                       (rb_bufsz << 1));
7284
7285         if (rdev->wb.enabled)
7286                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7287
7288         /* set the writeback address whether it's enabled or not */
7289         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7290         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7291
7292         WREG32(IH_RB_CNTL, ih_rb_cntl);
7293
7294         /* set rptr, wptr to 0 */
7295         WREG32(IH_RB_RPTR, 0);
7296         WREG32(IH_RB_WPTR, 0);
7297
7298         /* Default settings for IH_CNTL (disabled at first) */
7299         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7300         /* RPTR_REARM only works if msi's are enabled */
7301         if (rdev->msi_enabled)
7302                 ih_cntl |= RPTR_REARM;
7303         WREG32(IH_CNTL, ih_cntl);
7304
7305         /* force the active interrupt state to all disabled */
7306         cik_disable_interrupt_state(rdev);
7307
7308         pci_set_master(rdev->pdev);
7309
7310         /* enable irqs */
7311         cik_enable_interrupts(rdev);
7312
7313         return ret;
7314 }
7315
7316 /**
7317  * cik_irq_set - enable/disable interrupt sources
7318  *
7319  * @rdev: radeon_device pointer
7320  *
7321  * Enable interrupt sources on the GPU (vblanks, hpd,
7322  * etc.) (CIK).
7323  * Returns 0 for success, errors for failure.
7324  */
7325 int cik_irq_set(struct radeon_device *rdev)
7326 {
7327         u32 cp_int_cntl;
7328         u32 cp_m1p0;
7329         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7330         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7331         u32 grbm_int_cntl = 0;
7332         u32 dma_cntl, dma_cntl1;
7333         u32 thermal_int;
7334
7335         if (!rdev->irq.installed) {
7336                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7337                 return -EINVAL;
7338         }
7339         /* don't enable anything if the ih is disabled */
7340         if (!rdev->ih.enabled) {
7341                 cik_disable_interrupts(rdev);
7342                 /* force the active interrupt state to all disabled */
7343                 cik_disable_interrupt_state(rdev);
7344                 return 0;
7345         }
7346
7347         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7348                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7349         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7350
7351         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7352         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7353         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7354         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7355         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7356         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7357
7358         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7359         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7360
7361         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7362
7363         if (rdev->flags & RADEON_IS_IGP)
7364                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7365                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
7366         else
7367                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7368                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7369
7370         /* enable CP interrupts on all rings */
7371         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7372                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7373                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7374         }
7375         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7376                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7377                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7378                 if (ring->me == 1) {
7379                         switch (ring->pipe) {
7380                         case 0:
7381                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7382                                 break;
7383                         default:
7384                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7385                                 break;
7386                         }
7387                 } else {
7388                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7389                 }
7390         }
7391         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7392                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7393                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7394                 if (ring->me == 1) {
7395                         switch (ring->pipe) {
7396                         case 0:
7397                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7398                                 break;
7399                         default:
7400                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7401                                 break;
7402                         }
7403                 } else {
7404                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7405                 }
7406         }
7407
7408         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7409                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7410                 dma_cntl |= TRAP_ENABLE;
7411         }
7412
7413         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7414                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7415                 dma_cntl1 |= TRAP_ENABLE;
7416         }
7417
7418         if (rdev->irq.crtc_vblank_int[0] ||
7419             atomic_read(&rdev->irq.pflip[0])) {
7420                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7421                 crtc1 |= VBLANK_INTERRUPT_MASK;
7422         }
7423         if (rdev->irq.crtc_vblank_int[1] ||
7424             atomic_read(&rdev->irq.pflip[1])) {
7425                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7426                 crtc2 |= VBLANK_INTERRUPT_MASK;
7427         }
7428         if (rdev->irq.crtc_vblank_int[2] ||
7429             atomic_read(&rdev->irq.pflip[2])) {
7430                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7431                 crtc3 |= VBLANK_INTERRUPT_MASK;
7432         }
7433         if (rdev->irq.crtc_vblank_int[3] ||
7434             atomic_read(&rdev->irq.pflip[3])) {
7435                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7436                 crtc4 |= VBLANK_INTERRUPT_MASK;
7437         }
7438         if (rdev->irq.crtc_vblank_int[4] ||
7439             atomic_read(&rdev->irq.pflip[4])) {
7440                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7441                 crtc5 |= VBLANK_INTERRUPT_MASK;
7442         }
7443         if (rdev->irq.crtc_vblank_int[5] ||
7444             atomic_read(&rdev->irq.pflip[5])) {
7445                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7446                 crtc6 |= VBLANK_INTERRUPT_MASK;
7447         }
7448         if (rdev->irq.hpd[0]) {
7449                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7450                 hpd1 |= DC_HPDx_INT_EN;
7451         }
7452         if (rdev->irq.hpd[1]) {
7453                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7454                 hpd2 |= DC_HPDx_INT_EN;
7455         }
7456         if (rdev->irq.hpd[2]) {
7457                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7458                 hpd3 |= DC_HPDx_INT_EN;
7459         }
7460         if (rdev->irq.hpd[3]) {
7461                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7462                 hpd4 |= DC_HPDx_INT_EN;
7463         }
7464         if (rdev->irq.hpd[4]) {
7465                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7466                 hpd5 |= DC_HPDx_INT_EN;
7467         }
7468         if (rdev->irq.hpd[5]) {
7469                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7470                 hpd6 |= DC_HPDx_INT_EN;
7471         }
7472
7473         if (rdev->irq.dpm_thermal) {
7474                 DRM_DEBUG("dpm thermal\n");
7475                 if (rdev->flags & RADEON_IS_IGP)
7476                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7477                 else
7478                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7479         }
7480
7481         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7482
7483         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7484         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7485
7486         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7487
7488         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7489
7490         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7491         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7492         if (rdev->num_crtc >= 4) {
7493                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7494                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7495         }
7496         if (rdev->num_crtc >= 6) {
7497                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7498                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7499         }
7500
7501         if (rdev->num_crtc >= 2) {
7502                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7503                        GRPH_PFLIP_INT_MASK);
7504                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7505                        GRPH_PFLIP_INT_MASK);
7506         }
7507         if (rdev->num_crtc >= 4) {
7508                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7509                        GRPH_PFLIP_INT_MASK);
7510                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7511                        GRPH_PFLIP_INT_MASK);
7512         }
7513         if (rdev->num_crtc >= 6) {
7514                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7515                        GRPH_PFLIP_INT_MASK);
7516                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7517                        GRPH_PFLIP_INT_MASK);
7518         }
7519
7520         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7521         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7522         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7523         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7524         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7525         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7526
7527         if (rdev->flags & RADEON_IS_IGP)
7528                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7529         else
7530                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7531
7532         return 0;
7533 }
7534
7535 /**
7536  * cik_irq_ack - ack interrupt sources
7537  *
7538  * @rdev: radeon_device pointer
7539  *
7540  * Ack interrupt sources on the GPU (vblanks, hpd,
7541  * etc.) (CIK).  Certain interrupts sources are sw
7542  * generated and do not require an explicit ack.
7543  */
7544 static inline void cik_irq_ack(struct radeon_device *rdev)
7545 {
7546         u32 tmp;
7547
7548         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7549         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7550         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7551         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7552         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7553         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7554         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7555
7556         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7557                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7558         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7559                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7560         if (rdev->num_crtc >= 4) {
7561                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7562                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7563                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7564                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7565         }
7566         if (rdev->num_crtc >= 6) {
7567                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7568                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7569                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7570                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7571         }
7572
7573         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7574                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7575                        GRPH_PFLIP_INT_CLEAR);
7576         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7577                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7578                        GRPH_PFLIP_INT_CLEAR);
7579         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7580                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7581         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7582                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7583         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7584                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7585         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7586                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7587
7588         if (rdev->num_crtc >= 4) {
7589                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7590                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7591                                GRPH_PFLIP_INT_CLEAR);
7592                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7593                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7594                                GRPH_PFLIP_INT_CLEAR);
7595                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7596                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7597                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7598                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7599                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7600                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7601                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7602                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7603         }
7604
7605         if (rdev->num_crtc >= 6) {
7606                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7607                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7608                                GRPH_PFLIP_INT_CLEAR);
7609                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7610                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7611                                GRPH_PFLIP_INT_CLEAR);
7612                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7613                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7614                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7615                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7616                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7617                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7618                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7619                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7620         }
7621
7622         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7623                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7624                 tmp |= DC_HPDx_INT_ACK;
7625                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7626         }
7627         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7628                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7629                 tmp |= DC_HPDx_INT_ACK;
7630                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7631         }
7632         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7633                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7634                 tmp |= DC_HPDx_INT_ACK;
7635                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7636         }
7637         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7638                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7639                 tmp |= DC_HPDx_INT_ACK;
7640                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7641         }
7642         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7643                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7644                 tmp |= DC_HPDx_INT_ACK;
7645                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7646         }
7647         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7648                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7649                 tmp |= DC_HPDx_INT_ACK;
7650                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7651         }
7652 }
7653
7654 /**
7655  * cik_irq_disable - disable interrupts
7656  *
7657  * @rdev: radeon_device pointer
7658  *
7659  * Disable interrupts on the hw (CIK).
7660  */
7661 static void cik_irq_disable(struct radeon_device *rdev)
7662 {
7663         cik_disable_interrupts(rdev);
7664         /* Wait and acknowledge irq */
7665         mdelay(1);
7666         cik_irq_ack(rdev);
7667         cik_disable_interrupt_state(rdev);
7668 }
7669
7670 /**
7671  * cik_irq_disable - disable interrupts for suspend
7672  *
7673  * @rdev: radeon_device pointer
7674  *
7675  * Disable interrupts and stop the RLC (CIK).
7676  * Used for suspend.
7677  */
7678 static void cik_irq_suspend(struct radeon_device *rdev)
7679 {
7680         cik_irq_disable(rdev);
7681         cik_rlc_stop(rdev);
7682 }
7683
7684 /**
7685  * cik_irq_fini - tear down interrupt support
7686  *
7687  * @rdev: radeon_device pointer
7688  *
7689  * Disable interrupts on the hw and free the IH ring
7690  * buffer (CIK).
7691  * Used for driver unload.
7692  */
7693 static void cik_irq_fini(struct radeon_device *rdev)
7694 {
7695         cik_irq_suspend(rdev);
7696         r600_ih_ring_fini(rdev);
7697 }
7698
7699 /**
7700  * cik_get_ih_wptr - get the IH ring buffer wptr
7701  *
7702  * @rdev: radeon_device pointer
7703  *
7704  * Get the IH ring buffer wptr from either the register
7705  * or the writeback memory buffer (CIK).  Also check for
7706  * ring buffer overflow and deal with it.
7707  * Used by cik_irq_process().
7708  * Returns the value of the wptr.
7709  */
7710 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7711 {
7712         u32 wptr, tmp;
7713
7714         if (rdev->wb.enabled)
7715                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7716         else
7717                 wptr = RREG32(IH_RB_WPTR);
7718
7719         if (wptr & RB_OVERFLOW) {
7720                 wptr &= ~RB_OVERFLOW;
7721                 /* When a ring buffer overflow happen start parsing interrupt
7722                  * from the last not overwritten vector (wptr + 16). Hopefully
7723                  * this should allow us to catchup.
7724                  */
7725                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7726                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7727                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7728                 tmp = RREG32(IH_RB_CNTL);
7729                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7730                 WREG32(IH_RB_CNTL, tmp);
7731         }
7732         return (wptr & rdev->ih.ptr_mask);
7733 }
7734
7735 /*        CIK IV Ring
7736  * Each IV ring entry is 128 bits:
7737  * [7:0]    - interrupt source id
7738  * [31:8]   - reserved
7739  * [59:32]  - interrupt source data
7740  * [63:60]  - reserved
7741  * [71:64]  - RINGID
7742  *            CP:
7743  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7744  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7745  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7746  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7747  *            PIPE_ID - ME0 0=3D
7748  *                    - ME1&2 compute dispatcher (4 pipes each)
7749  *            SDMA:
7750  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7751  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7752  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7753  * [79:72]  - VMID
7754  * [95:80]  - PASID
7755  * [127:96] - reserved
7756  */
7757 /**
7758  * cik_irq_process - interrupt handler
7759  *
7760  * @rdev: radeon_device pointer
7761  *
7762  * Interrupt hander (CIK).  Walk the IH ring,
7763  * ack interrupts and schedule work to handle
7764  * interrupt events.
7765  * Returns irq process return code.
7766  */
7767 int cik_irq_process(struct radeon_device *rdev)
7768 {
7769         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7770         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7771         u32 wptr;
7772         u32 rptr;
7773         u32 src_id, src_data, ring_id;
7774         u8 me_id, pipe_id, queue_id;
7775         u32 ring_index;
7776         bool queue_hotplug = false;
7777         bool queue_reset = false;
7778         u32 addr, status, mc_client;
7779         bool queue_thermal = false;
7780
7781         if (!rdev->ih.enabled || rdev->shutdown)
7782                 return IRQ_NONE;
7783
7784         wptr = cik_get_ih_wptr(rdev);
7785
7786 restart_ih:
7787         /* is somebody else already processing irqs? */
7788         if (atomic_xchg(&rdev->ih.lock, 1))
7789                 return IRQ_NONE;
7790
7791         rptr = rdev->ih.rptr;
7792         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7793
7794         /* Order reading of wptr vs. reading of IH ring data */
7795         rmb();
7796
7797         /* display interrupts */
7798         cik_irq_ack(rdev);
7799
7800         while (rptr != wptr) {
7801                 /* wptr/rptr are in bytes! */
7802                 ring_index = rptr / 4;
7803
7804                 radeon_kfd_interrupt(rdev,
7805                                 (const void *) &rdev->ih.ring[ring_index]);
7806
7807                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7808                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7809                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7810
7811                 switch (src_id) {
7812                 case 1: /* D1 vblank/vline */
7813                         switch (src_data) {
7814                         case 0: /* D1 vblank */
7815                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7816                                         if (rdev->irq.crtc_vblank_int[0]) {
7817                                                 drm_handle_vblank(rdev->ddev, 0);
7818                                                 rdev->pm.vblank_sync = true;
7819                                                 wake_up(&rdev->irq.vblank_queue);
7820                                         }
7821                                         if (atomic_read(&rdev->irq.pflip[0]))
7822                                                 radeon_crtc_handle_vblank(rdev, 0);
7823                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7824                                         DRM_DEBUG("IH: D1 vblank\n");
7825                                 }
7826                                 break;
7827                         case 1: /* D1 vline */
7828                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7829                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7830                                         DRM_DEBUG("IH: D1 vline\n");
7831                                 }
7832                                 break;
7833                         default:
7834                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7835                                 break;
7836                         }
7837                         break;
7838                 case 2: /* D2 vblank/vline */
7839                         switch (src_data) {
7840                         case 0: /* D2 vblank */
7841                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7842                                         if (rdev->irq.crtc_vblank_int[1]) {
7843                                                 drm_handle_vblank(rdev->ddev, 1);
7844                                                 rdev->pm.vblank_sync = true;
7845                                                 wake_up(&rdev->irq.vblank_queue);
7846                                         }
7847                                         if (atomic_read(&rdev->irq.pflip[1]))
7848                                                 radeon_crtc_handle_vblank(rdev, 1);
7849                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7850                                         DRM_DEBUG("IH: D2 vblank\n");
7851                                 }
7852                                 break;
7853                         case 1: /* D2 vline */
7854                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7855                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7856                                         DRM_DEBUG("IH: D2 vline\n");
7857                                 }
7858                                 break;
7859                         default:
7860                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7861                                 break;
7862                         }
7863                         break;
7864                 case 3: /* D3 vblank/vline */
7865                         switch (src_data) {
7866                         case 0: /* D3 vblank */
7867                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7868                                         if (rdev->irq.crtc_vblank_int[2]) {
7869                                                 drm_handle_vblank(rdev->ddev, 2);
7870                                                 rdev->pm.vblank_sync = true;
7871                                                 wake_up(&rdev->irq.vblank_queue);
7872                                         }
7873                                         if (atomic_read(&rdev->irq.pflip[2]))
7874                                                 radeon_crtc_handle_vblank(rdev, 2);
7875                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7876                                         DRM_DEBUG("IH: D3 vblank\n");
7877                                 }
7878                                 break;
7879                         case 1: /* D3 vline */
7880                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7881                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7882                                         DRM_DEBUG("IH: D3 vline\n");
7883                                 }
7884                                 break;
7885                         default:
7886                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7887                                 break;
7888                         }
7889                         break;
7890                 case 4: /* D4 vblank/vline */
7891                         switch (src_data) {
7892                         case 0: /* D4 vblank */
7893                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7894                                         if (rdev->irq.crtc_vblank_int[3]) {
7895                                                 drm_handle_vblank(rdev->ddev, 3);
7896                                                 rdev->pm.vblank_sync = true;
7897                                                 wake_up(&rdev->irq.vblank_queue);
7898                                         }
7899                                         if (atomic_read(&rdev->irq.pflip[3]))
7900                                                 radeon_crtc_handle_vblank(rdev, 3);
7901                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7902                                         DRM_DEBUG("IH: D4 vblank\n");
7903                                 }
7904                                 break;
7905                         case 1: /* D4 vline */
7906                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7907                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7908                                         DRM_DEBUG("IH: D4 vline\n");
7909                                 }
7910                                 break;
7911                         default:
7912                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7913                                 break;
7914                         }
7915                         break;
7916                 case 5: /* D5 vblank/vline */
7917                         switch (src_data) {
7918                         case 0: /* D5 vblank */
7919                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7920                                         if (rdev->irq.crtc_vblank_int[4]) {
7921                                                 drm_handle_vblank(rdev->ddev, 4);
7922                                                 rdev->pm.vblank_sync = true;
7923                                                 wake_up(&rdev->irq.vblank_queue);
7924                                         }
7925                                         if (atomic_read(&rdev->irq.pflip[4]))
7926                                                 radeon_crtc_handle_vblank(rdev, 4);
7927                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7928                                         DRM_DEBUG("IH: D5 vblank\n");
7929                                 }
7930                                 break;
7931                         case 1: /* D5 vline */
7932                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7933                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7934                                         DRM_DEBUG("IH: D5 vline\n");
7935                                 }
7936                                 break;
7937                         default:
7938                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7939                                 break;
7940                         }
7941                         break;
7942                 case 6: /* D6 vblank/vline */
7943                         switch (src_data) {
7944                         case 0: /* D6 vblank */
7945                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7946                                         if (rdev->irq.crtc_vblank_int[5]) {
7947                                                 drm_handle_vblank(rdev->ddev, 5);
7948                                                 rdev->pm.vblank_sync = true;
7949                                                 wake_up(&rdev->irq.vblank_queue);
7950                                         }
7951                                         if (atomic_read(&rdev->irq.pflip[5]))
7952                                                 radeon_crtc_handle_vblank(rdev, 5);
7953                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7954                                         DRM_DEBUG("IH: D6 vblank\n");
7955                                 }
7956                                 break;
7957                         case 1: /* D6 vline */
7958                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7959                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7960                                         DRM_DEBUG("IH: D6 vline\n");
7961                                 }
7962                                 break;
7963                         default:
7964                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7965                                 break;
7966                         }
7967                         break;
7968                 case 8: /* D1 page flip */
7969                 case 10: /* D2 page flip */
7970                 case 12: /* D3 page flip */
7971                 case 14: /* D4 page flip */
7972                 case 16: /* D5 page flip */
7973                 case 18: /* D6 page flip */
7974                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7975                         if (radeon_use_pflipirq > 0)
7976                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7977                         break;
7978                 case 42: /* HPD hotplug */
7979                         switch (src_data) {
7980                         case 0:
7981                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7982                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7983                                         queue_hotplug = true;
7984                                         DRM_DEBUG("IH: HPD1\n");
7985                                 }
7986                                 break;
7987                         case 1:
7988                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7989                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7990                                         queue_hotplug = true;
7991                                         DRM_DEBUG("IH: HPD2\n");
7992                                 }
7993                                 break;
7994                         case 2:
7995                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7996                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7997                                         queue_hotplug = true;
7998                                         DRM_DEBUG("IH: HPD3\n");
7999                                 }
8000                                 break;
8001                         case 3:
8002                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8003                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8004                                         queue_hotplug = true;
8005                                         DRM_DEBUG("IH: HPD4\n");
8006                                 }
8007                                 break;
8008                         case 4:
8009                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8010                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8011                                         queue_hotplug = true;
8012                                         DRM_DEBUG("IH: HPD5\n");
8013                                 }
8014                                 break;
8015                         case 5:
8016                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8017                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8018                                         queue_hotplug = true;
8019                                         DRM_DEBUG("IH: HPD6\n");
8020                                 }
8021                                 break;
8022                         default:
8023                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8024                                 break;
8025                         }
8026                         break;
8027                 case 124: /* UVD */
8028                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8029                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8030                         break;
8031                 case 146:
8032                 case 147:
8033                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8034                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8035                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8036                         /* reset addr and status */
8037                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8038                         if (addr == 0x0 && status == 0x0)
8039                                 break;
8040                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8041                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8042                                 addr);
8043                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8044                                 status);
8045                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8046                         break;
8047                 case 167: /* VCE */
8048                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8049                         switch (src_data) {
8050                         case 0:
8051                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8052                                 break;
8053                         case 1:
8054                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8055                                 break;
8056                         default:
8057                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8058                                 break;
8059                         }
8060                         break;
8061                 case 176: /* GFX RB CP_INT */
8062                 case 177: /* GFX IB CP_INT */
8063                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8064                         break;
8065                 case 181: /* CP EOP event */
8066                         DRM_DEBUG("IH: CP EOP\n");
8067                         /* XXX check the bitfield order! */
8068                         me_id = (ring_id & 0x60) >> 5;
8069                         pipe_id = (ring_id & 0x18) >> 3;
8070                         queue_id = (ring_id & 0x7) >> 0;
8071                         switch (me_id) {
8072                         case 0:
8073                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8074                                 break;
8075                         case 1:
8076                         case 2:
8077                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8078                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8079                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8080                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8081                                 break;
8082                         }
8083                         break;
8084                 case 184: /* CP Privileged reg access */
8085                         DRM_ERROR("Illegal register access in command stream\n");
8086                         /* XXX check the bitfield order! */
8087                         me_id = (ring_id & 0x60) >> 5;
8088                         pipe_id = (ring_id & 0x18) >> 3;
8089                         queue_id = (ring_id & 0x7) >> 0;
8090                         switch (me_id) {
8091                         case 0:
8092                                 /* This results in a full GPU reset, but all we need to do is soft
8093                                  * reset the CP for gfx
8094                                  */
8095                                 queue_reset = true;
8096                                 break;
8097                         case 1:
8098                                 /* XXX compute */
8099                                 queue_reset = true;
8100                                 break;
8101                         case 2:
8102                                 /* XXX compute */
8103                                 queue_reset = true;
8104                                 break;
8105                         }
8106                         break;
8107                 case 185: /* CP Privileged inst */
8108                         DRM_ERROR("Illegal instruction in command stream\n");
8109                         /* XXX check the bitfield order! */
8110                         me_id = (ring_id & 0x60) >> 5;
8111                         pipe_id = (ring_id & 0x18) >> 3;
8112                         queue_id = (ring_id & 0x7) >> 0;
8113                         switch (me_id) {
8114                         case 0:
8115                                 /* This results in a full GPU reset, but all we need to do is soft
8116                                  * reset the CP for gfx
8117                                  */
8118                                 queue_reset = true;
8119                                 break;
8120                         case 1:
8121                                 /* XXX compute */
8122                                 queue_reset = true;
8123                                 break;
8124                         case 2:
8125                                 /* XXX compute */
8126                                 queue_reset = true;
8127                                 break;
8128                         }
8129                         break;
8130                 case 224: /* SDMA trap event */
8131                         /* XXX check the bitfield order! */
8132                         me_id = (ring_id & 0x3) >> 0;
8133                         queue_id = (ring_id & 0xc) >> 2;
8134                         DRM_DEBUG("IH: SDMA trap\n");
8135                         switch (me_id) {
8136                         case 0:
8137                                 switch (queue_id) {
8138                                 case 0:
8139                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8140                                         break;
8141                                 case 1:
8142                                         /* XXX compute */
8143                                         break;
8144                                 case 2:
8145                                         /* XXX compute */
8146                                         break;
8147                                 }
8148                                 break;
8149                         case 1:
8150                                 switch (queue_id) {
8151                                 case 0:
8152                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8153                                         break;
8154                                 case 1:
8155                                         /* XXX compute */
8156                                         break;
8157                                 case 2:
8158                                         /* XXX compute */
8159                                         break;
8160                                 }
8161                                 break;
8162                         }
8163                         break;
8164                 case 230: /* thermal low to high */
8165                         DRM_DEBUG("IH: thermal low to high\n");
8166                         rdev->pm.dpm.thermal.high_to_low = false;
8167                         queue_thermal = true;
8168                         break;
8169                 case 231: /* thermal high to low */
8170                         DRM_DEBUG("IH: thermal high to low\n");
8171                         rdev->pm.dpm.thermal.high_to_low = true;
8172                         queue_thermal = true;
8173                         break;
8174                 case 233: /* GUI IDLE */
8175                         DRM_DEBUG("IH: GUI idle\n");
8176                         break;
8177                 case 241: /* SDMA Privileged inst */
8178                 case 247: /* SDMA Privileged inst */
8179                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8180                         /* XXX check the bitfield order! */
8181                         me_id = (ring_id & 0x3) >> 0;
8182                         queue_id = (ring_id & 0xc) >> 2;
8183                         switch (me_id) {
8184                         case 0:
8185                                 switch (queue_id) {
8186                                 case 0:
8187                                         queue_reset = true;
8188                                         break;
8189                                 case 1:
8190                                         /* XXX compute */
8191                                         queue_reset = true;
8192                                         break;
8193                                 case 2:
8194                                         /* XXX compute */
8195                                         queue_reset = true;
8196                                         break;
8197                                 }
8198                                 break;
8199                         case 1:
8200                                 switch (queue_id) {
8201                                 case 0:
8202                                         queue_reset = true;
8203                                         break;
8204                                 case 1:
8205                                         /* XXX compute */
8206                                         queue_reset = true;
8207                                         break;
8208                                 case 2:
8209                                         /* XXX compute */
8210                                         queue_reset = true;
8211                                         break;
8212                                 }
8213                                 break;
8214                         }
8215                         break;
8216                 default:
8217                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8218                         break;
8219                 }
8220
8221                 /* wptr/rptr are in bytes! */
8222                 rptr += 16;
8223                 rptr &= rdev->ih.ptr_mask;
8224                 WREG32(IH_RB_RPTR, rptr);
8225         }
8226         if (queue_hotplug)
8227                 schedule_work(&rdev->hotplug_work);
8228         if (queue_reset) {
8229                 rdev->needs_reset = true;
8230                 wake_up_all(&rdev->fence_queue);
8231         }
8232         if (queue_thermal)
8233                 schedule_work(&rdev->pm.dpm.thermal.work);
8234         rdev->ih.rptr = rptr;
8235         atomic_set(&rdev->ih.lock, 0);
8236
8237         /* make sure wptr hasn't changed while processing */
8238         wptr = cik_get_ih_wptr(rdev);
8239         if (wptr != rptr)
8240                 goto restart_ih;
8241
8242         return IRQ_HANDLED;
8243 }
8244
8245 /*
8246  * startup/shutdown callbacks
8247  */
8248 /**
8249  * cik_startup - program the asic to a functional state
8250  *
8251  * @rdev: radeon_device pointer
8252  *
8253  * Programs the asic to a functional state (CIK).
8254  * Called by cik_init() and cik_resume().
8255  * Returns 0 for success, error for failure.
8256  */
8257 static int cik_startup(struct radeon_device *rdev)
8258 {
8259         struct radeon_ring *ring;
8260         u32 nop;
8261         int r;
8262
8263         /* enable pcie gen2/3 link */
8264         cik_pcie_gen3_enable(rdev);
8265         /* enable aspm */
8266         cik_program_aspm(rdev);
8267
8268         /* scratch needs to be initialized before MC */
8269         r = r600_vram_scratch_init(rdev);
8270         if (r)
8271                 return r;
8272
8273         cik_mc_program(rdev);
8274
8275         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8276                 r = ci_mc_load_microcode(rdev);
8277                 if (r) {
8278                         DRM_ERROR("Failed to load MC firmware!\n");
8279                         return r;
8280                 }
8281         }
8282
8283         r = cik_pcie_gart_enable(rdev);
8284         if (r)
8285                 return r;
8286         cik_gpu_init(rdev);
8287
8288         /* allocate rlc buffers */
8289         if (rdev->flags & RADEON_IS_IGP) {
8290                 if (rdev->family == CHIP_KAVERI) {
8291                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8292                         rdev->rlc.reg_list_size =
8293                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8294                 } else {
8295                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8296                         rdev->rlc.reg_list_size =
8297                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8298                 }
8299         }
8300         rdev->rlc.cs_data = ci_cs_data;
8301         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8302         r = sumo_rlc_init(rdev);
8303         if (r) {
8304                 DRM_ERROR("Failed to init rlc BOs!\n");
8305                 return r;
8306         }
8307
8308         /* allocate wb buffer */
8309         r = radeon_wb_init(rdev);
8310         if (r)
8311                 return r;
8312
8313         /* allocate mec buffers */
8314         r = cik_mec_init(rdev);
8315         if (r) {
8316                 DRM_ERROR("Failed to init MEC BOs!\n");
8317                 return r;
8318         }
8319
8320         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8321         if (r) {
8322                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8323                 return r;
8324         }
8325
8326         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8327         if (r) {
8328                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8329                 return r;
8330         }
8331
8332         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8333         if (r) {
8334                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8335                 return r;
8336         }
8337
8338         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8339         if (r) {
8340                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8341                 return r;
8342         }
8343
8344         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8345         if (r) {
8346                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8347                 return r;
8348         }
8349
8350         r = radeon_uvd_resume(rdev);
8351         if (!r) {
8352                 r = uvd_v4_2_resume(rdev);
8353                 if (!r) {
8354                         r = radeon_fence_driver_start_ring(rdev,
8355                                                            R600_RING_TYPE_UVD_INDEX);
8356                         if (r)
8357                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8358                 }
8359         }
8360         if (r)
8361                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8362
8363         r = radeon_vce_resume(rdev);
8364         if (!r) {
8365                 r = vce_v2_0_resume(rdev);
8366                 if (!r)
8367                         r = radeon_fence_driver_start_ring(rdev,
8368                                                            TN_RING_TYPE_VCE1_INDEX);
8369                 if (!r)
8370                         r = radeon_fence_driver_start_ring(rdev,
8371                                                            TN_RING_TYPE_VCE2_INDEX);
8372         }
8373         if (r) {
8374                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8375                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8376                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8377         }
8378
8379         /* Enable IRQ */
8380         if (!rdev->irq.installed) {
8381                 r = radeon_irq_kms_init(rdev);
8382                 if (r)
8383                         return r;
8384         }
8385
8386         r = cik_irq_init(rdev);
8387         if (r) {
8388                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8389                 radeon_irq_kms_fini(rdev);
8390                 return r;
8391         }
8392         cik_irq_set(rdev);
8393
8394         if (rdev->family == CHIP_HAWAII) {
8395                 if (rdev->new_fw)
8396                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8397                 else
8398                         nop = RADEON_CP_PACKET2;
8399         } else {
8400                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8401         }
8402
8403         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8404         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8405                              nop);
8406         if (r)
8407                 return r;
8408
8409         /* set up the compute queues */
8410         /* type-2 packets are deprecated on MEC, use type-3 instead */
8411         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8412         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8413                              nop);
8414         if (r)
8415                 return r;
8416         ring->me = 1; /* first MEC */
8417         ring->pipe = 0; /* first pipe */
8418         ring->queue = 0; /* first queue */
8419         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8420
8421         /* type-2 packets are deprecated on MEC, use type-3 instead */
8422         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8423         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8424                              nop);
8425         if (r)
8426                 return r;
8427         /* dGPU only have 1 MEC */
8428         ring->me = 1; /* first MEC */
8429         ring->pipe = 0; /* first pipe */
8430         ring->queue = 1; /* second queue */
8431         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8432
8433         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8434         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8435                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436         if (r)
8437                 return r;
8438
8439         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8440         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8441                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8442         if (r)
8443                 return r;
8444
8445         r = cik_cp_resume(rdev);
8446         if (r)
8447                 return r;
8448
8449         r = cik_sdma_resume(rdev);
8450         if (r)
8451                 return r;
8452
8453         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8454         if (ring->ring_size) {
8455                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8456                                      RADEON_CP_PACKET2);
8457                 if (!r)
8458                         r = uvd_v1_0_init(rdev);
8459                 if (r)
8460                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8461         }
8462
8463         r = -ENOENT;
8464
8465         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8466         if (ring->ring_size)
8467                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8468                                      VCE_CMD_NO_OP);
8469
8470         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8471         if (ring->ring_size)
8472                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8473                                      VCE_CMD_NO_OP);
8474
8475         if (!r)
8476                 r = vce_v1_0_init(rdev);
8477         else if (r != -ENOENT)
8478                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8479
8480         r = radeon_ib_pool_init(rdev);
8481         if (r) {
8482                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8483                 return r;
8484         }
8485
8486         r = radeon_vm_manager_init(rdev);
8487         if (r) {
8488                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8489                 return r;
8490         }
8491
8492         r = dce6_audio_init(rdev);
8493         if (r)
8494                 return r;
8495
8496         r = radeon_kfd_resume(rdev);
8497         if (r)
8498                 return r;
8499
8500         return 0;
8501 }
8502
8503 /**
8504  * cik_resume - resume the asic to a functional state
8505  *
8506  * @rdev: radeon_device pointer
8507  *
8508  * Programs the asic to a functional state (CIK).
8509  * Called at resume.
8510  * Returns 0 for success, error for failure.
8511  */
8512 int cik_resume(struct radeon_device *rdev)
8513 {
8514         int r;
8515
8516         /* post card */
8517         atom_asic_init(rdev->mode_info.atom_context);
8518
8519         /* init golden registers */
8520         cik_init_golden_registers(rdev);
8521
8522         if (rdev->pm.pm_method == PM_METHOD_DPM)
8523                 radeon_pm_resume(rdev);
8524
8525         rdev->accel_working = true;
8526         r = cik_startup(rdev);
8527         if (r) {
8528                 DRM_ERROR("cik startup failed on resume\n");
8529                 rdev->accel_working = false;
8530                 return r;
8531         }
8532
8533         return r;
8534
8535 }
8536
8537 /**
8538  * cik_suspend - suspend the asic
8539  *
8540  * @rdev: radeon_device pointer
8541  *
8542  * Bring the chip into a state suitable for suspend (CIK).
8543  * Called at suspend.
8544  * Returns 0 for success.
8545  */
8546 int cik_suspend(struct radeon_device *rdev)
8547 {
8548         radeon_kfd_suspend(rdev);
8549         radeon_pm_suspend(rdev);
8550         dce6_audio_fini(rdev);
8551         radeon_vm_manager_fini(rdev);
8552         cik_cp_enable(rdev, false);
8553         cik_sdma_enable(rdev, false);
8554         uvd_v1_0_fini(rdev);
8555         radeon_uvd_suspend(rdev);
8556         radeon_vce_suspend(rdev);
8557         cik_fini_pg(rdev);
8558         cik_fini_cg(rdev);
8559         cik_irq_suspend(rdev);
8560         radeon_wb_disable(rdev);
8561         cik_pcie_gart_disable(rdev);
8562         return 0;
8563 }
8564
8565 /* Plan is to move initialization in that function and use
8566  * helper function so that radeon_device_init pretty much
8567  * do nothing more than calling asic specific function. This
8568  * should also allow to remove a bunch of callback function
8569  * like vram_info.
8570  */
8571 /**
8572  * cik_init - asic specific driver and hw init
8573  *
8574  * @rdev: radeon_device pointer
8575  *
8576  * Setup asic specific driver variables and program the hw
8577  * to a functional state (CIK).
8578  * Called at driver startup.
8579  * Returns 0 for success, errors for failure.
8580  */
8581 int cik_init(struct radeon_device *rdev)
8582 {
8583         struct radeon_ring *ring;
8584         int r;
8585
8586         /* Read BIOS */
8587         if (!radeon_get_bios(rdev)) {
8588                 if (ASIC_IS_AVIVO(rdev))
8589                         return -EINVAL;
8590         }
8591         /* Must be an ATOMBIOS */
8592         if (!rdev->is_atom_bios) {
8593                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8594                 return -EINVAL;
8595         }
8596         r = radeon_atombios_init(rdev);
8597         if (r)
8598                 return r;
8599
8600         /* Post card if necessary */
8601         if (!radeon_card_posted(rdev)) {
8602                 if (!rdev->bios) {
8603                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8604                         return -EINVAL;
8605                 }
8606                 DRM_INFO("GPU not posted. posting now...\n");
8607                 atom_asic_init(rdev->mode_info.atom_context);
8608         }
8609         /* init golden registers */
8610         cik_init_golden_registers(rdev);
8611         /* Initialize scratch registers */
8612         cik_scratch_init(rdev);
8613         /* Initialize surface registers */
8614         radeon_surface_init(rdev);
8615         /* Initialize clocks */
8616         radeon_get_clock_info(rdev->ddev);
8617
8618         /* Fence driver */
8619         r = radeon_fence_driver_init(rdev);
8620         if (r)
8621                 return r;
8622
8623         /* initialize memory controller */
8624         r = cik_mc_init(rdev);
8625         if (r)
8626                 return r;
8627         /* Memory manager */
8628         r = radeon_bo_init(rdev);
8629         if (r)
8630                 return r;
8631
8632         if (rdev->flags & RADEON_IS_IGP) {
8633                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8634                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8635                         r = cik_init_microcode(rdev);
8636                         if (r) {
8637                                 DRM_ERROR("Failed to load firmware!\n");
8638                                 return r;
8639                         }
8640                 }
8641         } else {
8642                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8643                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8644                     !rdev->mc_fw) {
8645                         r = cik_init_microcode(rdev);
8646                         if (r) {
8647                                 DRM_ERROR("Failed to load firmware!\n");
8648                                 return r;
8649                         }
8650                 }
8651         }
8652
8653         /* Initialize power management */
8654         radeon_pm_init(rdev);
8655
8656         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8657         ring->ring_obj = NULL;
8658         r600_ring_init(rdev, ring, 1024 * 1024);
8659
8660         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8661         ring->ring_obj = NULL;
8662         r600_ring_init(rdev, ring, 1024 * 1024);
8663         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8664         if (r)
8665                 return r;
8666
8667         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8668         ring->ring_obj = NULL;
8669         r600_ring_init(rdev, ring, 1024 * 1024);
8670         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8671         if (r)
8672                 return r;
8673
8674         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8675         ring->ring_obj = NULL;
8676         r600_ring_init(rdev, ring, 256 * 1024);
8677
8678         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8679         ring->ring_obj = NULL;
8680         r600_ring_init(rdev, ring, 256 * 1024);
8681
8682         r = radeon_uvd_init(rdev);
8683         if (!r) {
8684                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8685                 ring->ring_obj = NULL;
8686                 r600_ring_init(rdev, ring, 4096);
8687         }
8688
8689         r = radeon_vce_init(rdev);
8690         if (!r) {
8691                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8692                 ring->ring_obj = NULL;
8693                 r600_ring_init(rdev, ring, 4096);
8694
8695                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8696                 ring->ring_obj = NULL;
8697                 r600_ring_init(rdev, ring, 4096);
8698         }
8699
8700         rdev->ih.ring_obj = NULL;
8701         r600_ih_ring_init(rdev, 64 * 1024);
8702
8703         r = r600_pcie_gart_init(rdev);
8704         if (r)
8705                 return r;
8706
8707         rdev->accel_working = true;
8708         r = cik_startup(rdev);
8709         if (r) {
8710                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8711                 cik_cp_fini(rdev);
8712                 cik_sdma_fini(rdev);
8713                 cik_irq_fini(rdev);
8714                 sumo_rlc_fini(rdev);
8715                 cik_mec_fini(rdev);
8716                 radeon_wb_fini(rdev);
8717                 radeon_ib_pool_fini(rdev);
8718                 radeon_vm_manager_fini(rdev);
8719                 radeon_irq_kms_fini(rdev);
8720                 cik_pcie_gart_fini(rdev);
8721                 rdev->accel_working = false;
8722         }
8723
8724         /* Don't start up if the MC ucode is missing.
8725          * The default clocks and voltages before the MC ucode
8726          * is loaded are not suffient for advanced operations.
8727          */
8728         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8729                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8730                 return -EINVAL;
8731         }
8732
8733         return 0;
8734 }
8735
8736 /**
8737  * cik_fini - asic specific driver and hw fini
8738  *
8739  * @rdev: radeon_device pointer
8740  *
8741  * Tear down the asic specific driver variables and program the hw
8742  * to an idle state (CIK).
8743  * Called at driver unload.
8744  */
8745 void cik_fini(struct radeon_device *rdev)
8746 {
8747         radeon_pm_fini(rdev);
8748         cik_cp_fini(rdev);
8749         cik_sdma_fini(rdev);
8750         cik_fini_pg(rdev);
8751         cik_fini_cg(rdev);
8752         cik_irq_fini(rdev);
8753         sumo_rlc_fini(rdev);
8754         cik_mec_fini(rdev);
8755         radeon_wb_fini(rdev);
8756         radeon_vm_manager_fini(rdev);
8757         radeon_ib_pool_fini(rdev);
8758         radeon_irq_kms_fini(rdev);
8759         uvd_v1_0_fini(rdev);
8760         radeon_uvd_fini(rdev);
8761         radeon_vce_fini(rdev);
8762         cik_pcie_gart_fini(rdev);
8763         r600_vram_scratch_fini(rdev);
8764         radeon_gem_fini(rdev);
8765         radeon_fence_driver_fini(rdev);
8766         radeon_bo_fini(rdev);
8767         radeon_atombios_fini(rdev);
8768         kfree(rdev->bios);
8769         rdev->bios = NULL;
8770 }
8771
8772 void dce8_program_fmt(struct drm_encoder *encoder)
8773 {
8774         struct drm_device *dev = encoder->dev;
8775         struct radeon_device *rdev = dev->dev_private;
8776         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8777         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8778         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8779         int bpc = 0;
8780         u32 tmp = 0;
8781         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8782
8783         if (connector) {
8784                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8785                 bpc = radeon_get_monitor_bpc(connector);
8786                 dither = radeon_connector->dither;
8787         }
8788
8789         /* LVDS/eDP FMT is set up by atom */
8790         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8791                 return;
8792
8793         /* not needed for analog */
8794         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8795             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8796                 return;
8797
8798         if (bpc == 0)
8799                 return;
8800
8801         switch (bpc) {
8802         case 6:
8803                 if (dither == RADEON_FMT_DITHER_ENABLE)
8804                         /* XXX sort out optimal dither settings */
8805                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8806                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8807                 else
8808                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8809                 break;
8810         case 8:
8811                 if (dither == RADEON_FMT_DITHER_ENABLE)
8812                         /* XXX sort out optimal dither settings */
8813                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8814                                 FMT_RGB_RANDOM_ENABLE |
8815                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8816                 else
8817                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8818                 break;
8819         case 10:
8820                 if (dither == RADEON_FMT_DITHER_ENABLE)
8821                         /* XXX sort out optimal dither settings */
8822                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8823                                 FMT_RGB_RANDOM_ENABLE |
8824                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8825                 else
8826                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8827                 break;
8828         default:
8829                 /* not needed */
8830                 break;
8831         }
8832
8833         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8834 }
8835
8836 /* display watermark setup */
8837 /**
8838  * dce8_line_buffer_adjust - Set up the line buffer
8839  *
8840  * @rdev: radeon_device pointer
8841  * @radeon_crtc: the selected display controller
8842  * @mode: the current display mode on the selected display
8843  * controller
8844  *
8845  * Setup up the line buffer allocation for
8846  * the selected display controller (CIK).
8847  * Returns the line buffer size in pixels.
8848  */
8849 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8850                                    struct radeon_crtc *radeon_crtc,
8851                                    struct drm_display_mode *mode)
8852 {
8853         u32 tmp, buffer_alloc, i;
8854         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8855         /*
8856          * Line Buffer Setup
8857          * There are 6 line buffers, one for each display controllers.
8858          * There are 3 partitions per LB. Select the number of partitions
8859          * to enable based on the display width.  For display widths larger
8860          * than 4096, you need use to use 2 display controllers and combine
8861          * them using the stereo blender.
8862          */
8863         if (radeon_crtc->base.enabled && mode) {
8864                 if (mode->crtc_hdisplay < 1920) {
8865                         tmp = 1;
8866                         buffer_alloc = 2;
8867                 } else if (mode->crtc_hdisplay < 2560) {
8868                         tmp = 2;
8869                         buffer_alloc = 2;
8870                 } else if (mode->crtc_hdisplay < 4096) {
8871                         tmp = 0;
8872                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8873                 } else {
8874                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8875                         tmp = 0;
8876                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8877                 }
8878         } else {
8879                 tmp = 1;
8880                 buffer_alloc = 0;
8881         }
8882
8883         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8884                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8885
8886         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8887                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8888         for (i = 0; i < rdev->usec_timeout; i++) {
8889                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8890                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8891                         break;
8892                 udelay(1);
8893         }
8894
8895         if (radeon_crtc->base.enabled && mode) {
8896                 switch (tmp) {
8897                 case 0:
8898                 default:
8899                         return 4096 * 2;
8900                 case 1:
8901                         return 1920 * 2;
8902                 case 2:
8903                         return 2560 * 2;
8904                 }
8905         }
8906
8907         /* controller not enabled, so no lb used */
8908         return 0;
8909 }
8910
8911 /**
8912  * cik_get_number_of_dram_channels - get the number of dram channels
8913  *
8914  * @rdev: radeon_device pointer
8915  *
8916  * Look up the number of video ram channels (CIK).
8917  * Used for display watermark bandwidth calculations
8918  * Returns the number of dram channels
8919  */
8920 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8921 {
8922         u32 tmp = RREG32(MC_SHARED_CHMAP);
8923
8924         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8925         case 0:
8926         default:
8927                 return 1;
8928         case 1:
8929                 return 2;
8930         case 2:
8931                 return 4;
8932         case 3:
8933                 return 8;
8934         case 4:
8935                 return 3;
8936         case 5:
8937                 return 6;
8938         case 6:
8939                 return 10;
8940         case 7:
8941                 return 12;
8942         case 8:
8943                 return 16;
8944         }
8945 }
8946
8947 struct dce8_wm_params {
8948         u32 dram_channels; /* number of dram channels */
8949         u32 yclk;          /* bandwidth per dram data pin in kHz */
8950         u32 sclk;          /* engine clock in kHz */
8951         u32 disp_clk;      /* display clock in kHz */
8952         u32 src_width;     /* viewport width */
8953         u32 active_time;   /* active display time in ns */
8954         u32 blank_time;    /* blank time in ns */
8955         bool interlaced;    /* mode is interlaced */
8956         fixed20_12 vsc;    /* vertical scale ratio */
8957         u32 num_heads;     /* number of active crtcs */
8958         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8959         u32 lb_size;       /* line buffer allocated to pipe */
8960         u32 vtaps;         /* vertical scaler taps */
8961 };
8962
8963 /**
8964  * dce8_dram_bandwidth - get the dram bandwidth
8965  *
8966  * @wm: watermark calculation data
8967  *
8968  * Calculate the raw dram bandwidth (CIK).
8969  * Used for display watermark bandwidth calculations
8970  * Returns the dram bandwidth in MBytes/s
8971  */
8972 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8973 {
8974         /* Calculate raw DRAM Bandwidth */
8975         fixed20_12 dram_efficiency; /* 0.7 */
8976         fixed20_12 yclk, dram_channels, bandwidth;
8977         fixed20_12 a;
8978
8979         a.full = dfixed_const(1000);
8980         yclk.full = dfixed_const(wm->yclk);
8981         yclk.full = dfixed_div(yclk, a);
8982         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8983         a.full = dfixed_const(10);
8984         dram_efficiency.full = dfixed_const(7);
8985         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8986         bandwidth.full = dfixed_mul(dram_channels, yclk);
8987         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8988
8989         return dfixed_trunc(bandwidth);
8990 }
8991
8992 /**
8993  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8994  *
8995  * @wm: watermark calculation data
8996  *
8997  * Calculate the dram bandwidth used for display (CIK).
8998  * Used for display watermark bandwidth calculations
8999  * Returns the dram bandwidth for display in MBytes/s
9000  */
9001 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9002 {
9003         /* Calculate DRAM Bandwidth and the part allocated to display. */
9004         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9005         fixed20_12 yclk, dram_channels, bandwidth;
9006         fixed20_12 a;
9007
9008         a.full = dfixed_const(1000);
9009         yclk.full = dfixed_const(wm->yclk);
9010         yclk.full = dfixed_div(yclk, a);
9011         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9012         a.full = dfixed_const(10);
9013         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9014         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9015         bandwidth.full = dfixed_mul(dram_channels, yclk);
9016         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9017
9018         return dfixed_trunc(bandwidth);
9019 }
9020
9021 /**
9022  * dce8_data_return_bandwidth - get the data return bandwidth
9023  *
9024  * @wm: watermark calculation data
9025  *
9026  * Calculate the data return bandwidth used for display (CIK).
9027  * Used for display watermark bandwidth calculations
9028  * Returns the data return bandwidth in MBytes/s
9029  */
9030 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9031 {
9032         /* Calculate the display Data return Bandwidth */
9033         fixed20_12 return_efficiency; /* 0.8 */
9034         fixed20_12 sclk, bandwidth;
9035         fixed20_12 a;
9036
9037         a.full = dfixed_const(1000);
9038         sclk.full = dfixed_const(wm->sclk);
9039         sclk.full = dfixed_div(sclk, a);
9040         a.full = dfixed_const(10);
9041         return_efficiency.full = dfixed_const(8);
9042         return_efficiency.full = dfixed_div(return_efficiency, a);
9043         a.full = dfixed_const(32);
9044         bandwidth.full = dfixed_mul(a, sclk);
9045         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9046
9047         return dfixed_trunc(bandwidth);
9048 }
9049
9050 /**
9051  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9052  *
9053  * @wm: watermark calculation data
9054  *
9055  * Calculate the dmif bandwidth used for display (CIK).
9056  * Used for display watermark bandwidth calculations
9057  * Returns the dmif bandwidth in MBytes/s
9058  */
9059 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9060 {
9061         /* Calculate the DMIF Request Bandwidth */
9062         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9063         fixed20_12 disp_clk, bandwidth;
9064         fixed20_12 a, b;
9065
9066         a.full = dfixed_const(1000);
9067         disp_clk.full = dfixed_const(wm->disp_clk);
9068         disp_clk.full = dfixed_div(disp_clk, a);
9069         a.full = dfixed_const(32);
9070         b.full = dfixed_mul(a, disp_clk);
9071
9072         a.full = dfixed_const(10);
9073         disp_clk_request_efficiency.full = dfixed_const(8);
9074         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9075
9076         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9077
9078         return dfixed_trunc(bandwidth);
9079 }
9080
9081 /**
9082  * dce8_available_bandwidth - get the min available bandwidth
9083  *
9084  * @wm: watermark calculation data
9085  *
9086  * Calculate the min available bandwidth used for display (CIK).
9087  * Used for display watermark bandwidth calculations
9088  * Returns the min available bandwidth in MBytes/s
9089  */
9090 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9091 {
9092         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9093         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9094         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9095         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9096
9097         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9098 }
9099
9100 /**
9101  * dce8_average_bandwidth - get the average available bandwidth
9102  *
9103  * @wm: watermark calculation data
9104  *
9105  * Calculate the average available bandwidth used for display (CIK).
9106  * Used for display watermark bandwidth calculations
9107  * Returns the average available bandwidth in MBytes/s
9108  */
9109 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9110 {
9111         /* Calculate the display mode Average Bandwidth
9112          * DisplayMode should contain the source and destination dimensions,
9113          * timing, etc.
9114          */
9115         fixed20_12 bpp;
9116         fixed20_12 line_time;
9117         fixed20_12 src_width;
9118         fixed20_12 bandwidth;
9119         fixed20_12 a;
9120
9121         a.full = dfixed_const(1000);
9122         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9123         line_time.full = dfixed_div(line_time, a);
9124         bpp.full = dfixed_const(wm->bytes_per_pixel);
9125         src_width.full = dfixed_const(wm->src_width);
9126         bandwidth.full = dfixed_mul(src_width, bpp);
9127         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9128         bandwidth.full = dfixed_div(bandwidth, line_time);
9129
9130         return dfixed_trunc(bandwidth);
9131 }
9132
9133 /**
9134  * dce8_latency_watermark - get the latency watermark
9135  *
9136  * @wm: watermark calculation data
9137  *
9138  * Calculate the latency watermark (CIK).
9139  * Used for display watermark bandwidth calculations
9140  * Returns the latency watermark in ns
9141  */
9142 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9143 {
9144         /* First calculate the latency in ns */
9145         u32 mc_latency = 2000; /* 2000 ns. */
9146         u32 available_bandwidth = dce8_available_bandwidth(wm);
9147         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9148         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9149         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9150         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9151                 (wm->num_heads * cursor_line_pair_return_time);
9152         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9153         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9154         u32 tmp, dmif_size = 12288;
9155         fixed20_12 a, b, c;
9156
9157         if (wm->num_heads == 0)
9158                 return 0;
9159
9160         a.full = dfixed_const(2);
9161         b.full = dfixed_const(1);
9162         if ((wm->vsc.full > a.full) ||
9163             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9164             (wm->vtaps >= 5) ||
9165             ((wm->vsc.full >= a.full) && wm->interlaced))
9166                 max_src_lines_per_dst_line = 4;
9167         else
9168                 max_src_lines_per_dst_line = 2;
9169
9170         a.full = dfixed_const(available_bandwidth);
9171         b.full = dfixed_const(wm->num_heads);
9172         a.full = dfixed_div(a, b);
9173
9174         b.full = dfixed_const(mc_latency + 512);
9175         c.full = dfixed_const(wm->disp_clk);
9176         b.full = dfixed_div(b, c);
9177
9178         c.full = dfixed_const(dmif_size);
9179         b.full = dfixed_div(c, b);
9180
9181         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9182
9183         b.full = dfixed_const(1000);
9184         c.full = dfixed_const(wm->disp_clk);
9185         b.full = dfixed_div(c, b);
9186         c.full = dfixed_const(wm->bytes_per_pixel);
9187         b.full = dfixed_mul(b, c);
9188
9189         lb_fill_bw = min(tmp, dfixed_trunc(b));
9190
9191         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9192         b.full = dfixed_const(1000);
9193         c.full = dfixed_const(lb_fill_bw);
9194         b.full = dfixed_div(c, b);
9195         a.full = dfixed_div(a, b);
9196         line_fill_time = dfixed_trunc(a);
9197
9198         if (line_fill_time < wm->active_time)
9199                 return latency;
9200         else
9201                 return latency + (line_fill_time - wm->active_time);
9202
9203 }
9204
9205 /**
9206  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9207  * average and available dram bandwidth
9208  *
9209  * @wm: watermark calculation data
9210  *
9211  * Check if the display average bandwidth fits in the display
9212  * dram bandwidth (CIK).
9213  * Used for display watermark bandwidth calculations
9214  * Returns true if the display fits, false if not.
9215  */
9216 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9217 {
9218         if (dce8_average_bandwidth(wm) <=
9219             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9220                 return true;
9221         else
9222                 return false;
9223 }
9224
9225 /**
9226  * dce8_average_bandwidth_vs_available_bandwidth - check
9227  * average and available bandwidth
9228  *
9229  * @wm: watermark calculation data
9230  *
9231  * Check if the display average bandwidth fits in the display
9232  * available bandwidth (CIK).
9233  * Used for display watermark bandwidth calculations
9234  * Returns true if the display fits, false if not.
9235  */
9236 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9237 {
9238         if (dce8_average_bandwidth(wm) <=
9239             (dce8_available_bandwidth(wm) / wm->num_heads))
9240                 return true;
9241         else
9242                 return false;
9243 }
9244
9245 /**
9246  * dce8_check_latency_hiding - check latency hiding
9247  *
9248  * @wm: watermark calculation data
9249  *
9250  * Check latency hiding (CIK).
9251  * Used for display watermark bandwidth calculations
9252  * Returns true if the display fits, false if not.
9253  */
9254 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9255 {
9256         u32 lb_partitions = wm->lb_size / wm->src_width;
9257         u32 line_time = wm->active_time + wm->blank_time;
9258         u32 latency_tolerant_lines;
9259         u32 latency_hiding;
9260         fixed20_12 a;
9261
9262         a.full = dfixed_const(1);
9263         if (wm->vsc.full > a.full)
9264                 latency_tolerant_lines = 1;
9265         else {
9266                 if (lb_partitions <= (wm->vtaps + 1))
9267                         latency_tolerant_lines = 1;
9268                 else
9269                         latency_tolerant_lines = 2;
9270         }
9271
9272         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9273
9274         if (dce8_latency_watermark(wm) <= latency_hiding)
9275                 return true;
9276         else
9277                 return false;
9278 }
9279
9280 /**
9281  * dce8_program_watermarks - program display watermarks
9282  *
9283  * @rdev: radeon_device pointer
9284  * @radeon_crtc: the selected display controller
9285  * @lb_size: line buffer size
9286  * @num_heads: number of display controllers in use
9287  *
9288  * Calculate and program the display watermarks for the
9289  * selected display controller (CIK).
9290  */
9291 static void dce8_program_watermarks(struct radeon_device *rdev,
9292                                     struct radeon_crtc *radeon_crtc,
9293                                     u32 lb_size, u32 num_heads)
9294 {
9295         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9296         struct dce8_wm_params wm_low, wm_high;
9297         u32 pixel_period;
9298         u32 line_time = 0;
9299         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9300         u32 tmp, wm_mask;
9301
9302         if (radeon_crtc->base.enabled && num_heads && mode) {
9303                 pixel_period = 1000000 / (u32)mode->clock;
9304                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9305
9306                 /* watermark for high clocks */
9307                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9308                     rdev->pm.dpm_enabled) {
9309                         wm_high.yclk =
9310                                 radeon_dpm_get_mclk(rdev, false) * 10;
9311                         wm_high.sclk =
9312                                 radeon_dpm_get_sclk(rdev, false) * 10;
9313                 } else {
9314                         wm_high.yclk = rdev->pm.current_mclk * 10;
9315                         wm_high.sclk = rdev->pm.current_sclk * 10;
9316                 }
9317
9318                 wm_high.disp_clk = mode->clock;
9319                 wm_high.src_width = mode->crtc_hdisplay;
9320                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9321                 wm_high.blank_time = line_time - wm_high.active_time;
9322                 wm_high.interlaced = false;
9323                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9324                         wm_high.interlaced = true;
9325                 wm_high.vsc = radeon_crtc->vsc;
9326                 wm_high.vtaps = 1;
9327                 if (radeon_crtc->rmx_type != RMX_OFF)
9328                         wm_high.vtaps = 2;
9329                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9330                 wm_high.lb_size = lb_size;
9331                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9332                 wm_high.num_heads = num_heads;
9333
9334                 /* set for high clocks */
9335                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9336
9337                 /* possibly force display priority to high */
9338                 /* should really do this at mode validation time... */
9339                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9340                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9341                     !dce8_check_latency_hiding(&wm_high) ||
9342                     (rdev->disp_priority == 2)) {
9343                         DRM_DEBUG_KMS("force priority to high\n");
9344                 }
9345
9346                 /* watermark for low clocks */
9347                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9348                     rdev->pm.dpm_enabled) {
9349                         wm_low.yclk =
9350                                 radeon_dpm_get_mclk(rdev, true) * 10;
9351                         wm_low.sclk =
9352                                 radeon_dpm_get_sclk(rdev, true) * 10;
9353                 } else {
9354                         wm_low.yclk = rdev->pm.current_mclk * 10;
9355                         wm_low.sclk = rdev->pm.current_sclk * 10;
9356                 }
9357
9358                 wm_low.disp_clk = mode->clock;
9359                 wm_low.src_width = mode->crtc_hdisplay;
9360                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9361                 wm_low.blank_time = line_time - wm_low.active_time;
9362                 wm_low.interlaced = false;
9363                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9364                         wm_low.interlaced = true;
9365                 wm_low.vsc = radeon_crtc->vsc;
9366                 wm_low.vtaps = 1;
9367                 if (radeon_crtc->rmx_type != RMX_OFF)
9368                         wm_low.vtaps = 2;
9369                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9370                 wm_low.lb_size = lb_size;
9371                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9372                 wm_low.num_heads = num_heads;
9373
9374                 /* set for low clocks */
9375                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9376
9377                 /* possibly force display priority to high */
9378                 /* should really do this at mode validation time... */
9379                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9380                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9381                     !dce8_check_latency_hiding(&wm_low) ||
9382                     (rdev->disp_priority == 2)) {
9383                         DRM_DEBUG_KMS("force priority to high\n");
9384                 }
9385         }
9386
9387         /* select wm A */
9388         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9389         tmp = wm_mask;
9390         tmp &= ~LATENCY_WATERMARK_MASK(3);
9391         tmp |= LATENCY_WATERMARK_MASK(1);
9392         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9393         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9394                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9395                 LATENCY_HIGH_WATERMARK(line_time)));
9396         /* select wm B */
9397         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9398         tmp &= ~LATENCY_WATERMARK_MASK(3);
9399         tmp |= LATENCY_WATERMARK_MASK(2);
9400         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9401         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9402                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9403                 LATENCY_HIGH_WATERMARK(line_time)));
9404         /* restore original selection */
9405         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9406
9407         /* save values for DPM */
9408         radeon_crtc->line_time = line_time;
9409         radeon_crtc->wm_high = latency_watermark_a;
9410         radeon_crtc->wm_low = latency_watermark_b;
9411 }
9412
9413 /**
9414  * dce8_bandwidth_update - program display watermarks
9415  *
9416  * @rdev: radeon_device pointer
9417  *
9418  * Calculate and program the display watermarks and line
9419  * buffer allocation (CIK).
9420  */
9421 void dce8_bandwidth_update(struct radeon_device *rdev)
9422 {
9423         struct drm_display_mode *mode = NULL;
9424         u32 num_heads = 0, lb_size;
9425         int i;
9426
9427         radeon_update_display_priority(rdev);
9428
9429         for (i = 0; i < rdev->num_crtc; i++) {
9430                 if (rdev->mode_info.crtcs[i]->base.enabled)
9431                         num_heads++;
9432         }
9433         for (i = 0; i < rdev->num_crtc; i++) {
9434                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9435                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9436                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9437         }
9438 }
9439
9440 /**
9441  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9442  *
9443  * @rdev: radeon_device pointer
9444  *
9445  * Fetches a GPU clock counter snapshot (SI).
9446  * Returns the 64 bit clock counter snapshot.
9447  */
9448 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9449 {
9450         uint64_t clock;
9451
9452         mutex_lock(&rdev->gpu_clock_mutex);
9453         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9454         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9455                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9456         mutex_unlock(&rdev->gpu_clock_mutex);
9457         return clock;
9458 }
9459
9460 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9461                               u32 cntl_reg, u32 status_reg)
9462 {
9463         int r, i;
9464         struct atom_clock_dividers dividers;
9465         uint32_t tmp;
9466
9467         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9468                                            clock, false, &dividers);
9469         if (r)
9470                 return r;
9471
9472         tmp = RREG32_SMC(cntl_reg);
9473         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9474         tmp |= dividers.post_divider;
9475         WREG32_SMC(cntl_reg, tmp);
9476
9477         for (i = 0; i < 100; i++) {
9478                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9479                         break;
9480                 mdelay(10);
9481         }
9482         if (i == 100)
9483                 return -ETIMEDOUT;
9484
9485         return 0;
9486 }
9487
9488 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9489 {
9490         int r = 0;
9491
9492         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9493         if (r)
9494                 return r;
9495
9496         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9497         return r;
9498 }
9499
9500 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9501 {
9502         int r, i;
9503         struct atom_clock_dividers dividers;
9504         u32 tmp;
9505
9506         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9507                                            ecclk, false, &dividers);
9508         if (r)
9509                 return r;
9510
9511         for (i = 0; i < 100; i++) {
9512                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9513                         break;
9514                 mdelay(10);
9515         }
9516         if (i == 100)
9517                 return -ETIMEDOUT;
9518
9519         tmp = RREG32_SMC(CG_ECLK_CNTL);
9520         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9521         tmp |= dividers.post_divider;
9522         WREG32_SMC(CG_ECLK_CNTL, tmp);
9523
9524         for (i = 0; i < 100; i++) {
9525                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9526                         break;
9527                 mdelay(10);
9528         }
9529         if (i == 100)
9530                 return -ETIMEDOUT;
9531
9532         return 0;
9533 }
9534
9535 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9536 {
9537         struct pci_dev *root = rdev->pdev->bus->self;
9538         int bridge_pos, gpu_pos;
9539         u32 speed_cntl, mask, current_data_rate;
9540         int ret, i;
9541         u16 tmp16;
9542
9543         if (pci_is_root_bus(rdev->pdev->bus))
9544                 return;
9545
9546         if (radeon_pcie_gen2 == 0)
9547                 return;
9548
9549         if (rdev->flags & RADEON_IS_IGP)
9550                 return;
9551
9552         if (!(rdev->flags & RADEON_IS_PCIE))
9553                 return;
9554
9555         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9556         if (ret != 0)
9557                 return;
9558
9559         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9560                 return;
9561
9562         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9563         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9564                 LC_CURRENT_DATA_RATE_SHIFT;
9565         if (mask & DRM_PCIE_SPEED_80) {
9566                 if (current_data_rate == 2) {
9567                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9568                         return;
9569                 }
9570                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9571         } else if (mask & DRM_PCIE_SPEED_50) {
9572                 if (current_data_rate == 1) {
9573                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9574                         return;
9575                 }
9576                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9577         }
9578
9579         bridge_pos = pci_pcie_cap(root);
9580         if (!bridge_pos)
9581                 return;
9582
9583         gpu_pos = pci_pcie_cap(rdev->pdev);
9584         if (!gpu_pos)
9585                 return;
9586
9587         if (mask & DRM_PCIE_SPEED_80) {
9588                 /* re-try equalization if gen3 is not already enabled */
9589                 if (current_data_rate != 2) {
9590                         u16 bridge_cfg, gpu_cfg;
9591                         u16 bridge_cfg2, gpu_cfg2;
9592                         u32 max_lw, current_lw, tmp;
9593
9594                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9595                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9596
9597                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9598                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9599
9600                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9601                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9602
9603                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9604                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9605                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9606
9607                         if (current_lw < max_lw) {
9608                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9609                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9610                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9611                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9612                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9613                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9614                                 }
9615                         }
9616
9617                         for (i = 0; i < 10; i++) {
9618                                 /* check status */
9619                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9620                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9621                                         break;
9622
9623                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9624                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9625
9626                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9627                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9628
9629                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9630                                 tmp |= LC_SET_QUIESCE;
9631                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9632
9633                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9634                                 tmp |= LC_REDO_EQ;
9635                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9636
9637                                 mdelay(100);
9638
9639                                 /* linkctl */
9640                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9641                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9642                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9643                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9644
9645                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9646                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9647                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9648                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9649
9650                                 /* linkctl2 */
9651                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9652                                 tmp16 &= ~((1 << 4) | (7 << 9));
9653                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9654                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9655
9656                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9657                                 tmp16 &= ~((1 << 4) | (7 << 9));
9658                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9659                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9660
9661                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9662                                 tmp &= ~LC_SET_QUIESCE;
9663                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9664                         }
9665                 }
9666         }
9667
9668         /* set the link speed */
9669         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9670         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9671         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9672
9673         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9674         tmp16 &= ~0xf;
9675         if (mask & DRM_PCIE_SPEED_80)
9676                 tmp16 |= 3; /* gen3 */
9677         else if (mask & DRM_PCIE_SPEED_50)
9678                 tmp16 |= 2; /* gen2 */
9679         else
9680                 tmp16 |= 1; /* gen1 */
9681         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9682
9683         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9684         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9685         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9686
9687         for (i = 0; i < rdev->usec_timeout; i++) {
9688                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9689                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9690                         break;
9691                 udelay(1);
9692         }
9693 }
9694
9695 static void cik_program_aspm(struct radeon_device *rdev)
9696 {
9697         u32 data, orig;
9698         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9699         bool disable_clkreq = false;
9700
9701         if (radeon_aspm == 0)
9702                 return;
9703
9704         /* XXX double check IGPs */
9705         if (rdev->flags & RADEON_IS_IGP)
9706                 return;
9707
9708         if (!(rdev->flags & RADEON_IS_PCIE))
9709                 return;
9710
9711         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9712         data &= ~LC_XMIT_N_FTS_MASK;
9713         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9714         if (orig != data)
9715                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9716
9717         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9718         data |= LC_GO_TO_RECOVERY;
9719         if (orig != data)
9720                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9721
9722         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9723         data |= P_IGNORE_EDB_ERR;
9724         if (orig != data)
9725                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9726
9727         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9728         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9729         data |= LC_PMI_TO_L1_DIS;
9730         if (!disable_l0s)
9731                 data |= LC_L0S_INACTIVITY(7);
9732
9733         if (!disable_l1) {
9734                 data |= LC_L1_INACTIVITY(7);
9735                 data &= ~LC_PMI_TO_L1_DIS;
9736                 if (orig != data)
9737                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9738
9739                 if (!disable_plloff_in_l1) {
9740                         bool clk_req_support;
9741
9742                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9743                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9744                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9745                         if (orig != data)
9746                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9747
9748                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9749                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9750                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9751                         if (orig != data)
9752                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9753
9754                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9755                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9756                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9757                         if (orig != data)
9758                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9759
9760                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9761                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9762                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9763                         if (orig != data)
9764                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9765
9766                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9767                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9768                         data |= LC_DYN_LANES_PWR_STATE(3);
9769                         if (orig != data)
9770                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9771
9772                         if (!disable_clkreq &&
9773                             !pci_is_root_bus(rdev->pdev->bus)) {
9774                                 struct pci_dev *root = rdev->pdev->bus->self;
9775                                 u32 lnkcap;
9776
9777                                 clk_req_support = false;
9778                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9779                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9780                                         clk_req_support = true;
9781                         } else {
9782                                 clk_req_support = false;
9783                         }
9784
9785                         if (clk_req_support) {
9786                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9787                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9788                                 if (orig != data)
9789                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9790
9791                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9792                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9793                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9794                                 if (orig != data)
9795                                         WREG32_SMC(THM_CLK_CNTL, data);
9796
9797                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9798                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9799                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9800                                 if (orig != data)
9801                                         WREG32_SMC(MISC_CLK_CTRL, data);
9802
9803                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9804                                 data &= ~BCLK_AS_XCLK;
9805                                 if (orig != data)
9806                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9807
9808                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9809                                 data &= ~FORCE_BIF_REFCLK_EN;
9810                                 if (orig != data)
9811                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9812
9813                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9814                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9815                                 data |= MPLL_CLKOUT_SEL(4);
9816                                 if (orig != data)
9817                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9818                         }
9819                 }
9820         } else {
9821                 if (orig != data)
9822                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9823         }
9824
9825         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9826         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9827         if (orig != data)
9828                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9829
9830         if (!disable_l0s) {
9831                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9832                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9833                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9834                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9835                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9836                                 data &= ~LC_L0S_INACTIVITY_MASK;
9837                                 if (orig != data)
9838                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9839                         }
9840                 }
9841         }
9842 }