]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
drm/radeon: set PIPE_CONFIG for 1D and linear tiling modes on CIK
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
79 static void cik_rlc_stop(struct radeon_device *rdev);
80 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
81 static void cik_program_aspm(struct radeon_device *rdev);
82 static void cik_init_pg(struct radeon_device *rdev);
83 static void cik_init_cg(struct radeon_device *rdev);
84 static void cik_fini_pg(struct radeon_device *rdev);
85 static void cik_fini_cg(struct radeon_device *rdev);
86 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
87                                           bool enable);
88
89 /* get temperature in millidegrees */
90 int ci_get_temp(struct radeon_device *rdev)
91 {
92         u32 temp;
93         int actual_temp = 0;
94
95         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
96                 CTF_TEMP_SHIFT;
97
98         if (temp & 0x200)
99                 actual_temp = 255;
100         else
101                 actual_temp = temp & 0x1ff;
102
103         actual_temp = actual_temp * 1000;
104
105         return actual_temp;
106 }
107
108 /* get temperature in millidegrees */
109 int kv_get_temp(struct radeon_device *rdev)
110 {
111         u32 temp;
112         int actual_temp = 0;
113
114         temp = RREG32_SMC(0xC0300E0C);
115
116         if (temp)
117                 actual_temp = (temp / 8) - 49;
118         else
119                 actual_temp = 0;
120
121         actual_temp = actual_temp * 1000;
122
123         return actual_temp;
124 }
125
126 /*
127  * Indirect registers accessor
128  */
129 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
130 {
131         unsigned long flags;
132         u32 r;
133
134         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
135         WREG32(PCIE_INDEX, reg);
136         (void)RREG32(PCIE_INDEX);
137         r = RREG32(PCIE_DATA);
138         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
139         return r;
140 }
141
142 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
143 {
144         unsigned long flags;
145
146         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
147         WREG32(PCIE_INDEX, reg);
148         (void)RREG32(PCIE_INDEX);
149         WREG32(PCIE_DATA, v);
150         (void)RREG32(PCIE_DATA);
151         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
152 }
153
154 static const u32 spectre_rlc_save_restore_register_list[] =
155 {
156         (0x0e00 << 16) | (0xc12c >> 2),
157         0x00000000,
158         (0x0e00 << 16) | (0xc140 >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0xc150 >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0xc15c >> 2),
163         0x00000000,
164         (0x0e00 << 16) | (0xc168 >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0xc170 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0xc178 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc204 >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0xc2b4 >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0xc2b8 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0xc2bc >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0xc2c0 >> 2),
179         0x00000000,
180         (0x0e00 << 16) | (0x8228 >> 2),
181         0x00000000,
182         (0x0e00 << 16) | (0x829c >> 2),
183         0x00000000,
184         (0x0e00 << 16) | (0x869c >> 2),
185         0x00000000,
186         (0x0600 << 16) | (0x98f4 >> 2),
187         0x00000000,
188         (0x0e00 << 16) | (0x98f8 >> 2),
189         0x00000000,
190         (0x0e00 << 16) | (0x9900 >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0xc260 >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0x90e8 >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0x3c000 >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0x3c00c >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0x8c1c >> 2),
201         0x00000000,
202         (0x0e00 << 16) | (0x9700 >> 2),
203         0x00000000,
204         (0x0e00 << 16) | (0xcd20 >> 2),
205         0x00000000,
206         (0x4e00 << 16) | (0xcd20 >> 2),
207         0x00000000,
208         (0x5e00 << 16) | (0xcd20 >> 2),
209         0x00000000,
210         (0x6e00 << 16) | (0xcd20 >> 2),
211         0x00000000,
212         (0x7e00 << 16) | (0xcd20 >> 2),
213         0x00000000,
214         (0x8e00 << 16) | (0xcd20 >> 2),
215         0x00000000,
216         (0x9e00 << 16) | (0xcd20 >> 2),
217         0x00000000,
218         (0xae00 << 16) | (0xcd20 >> 2),
219         0x00000000,
220         (0xbe00 << 16) | (0xcd20 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0x89bc >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0x8900 >> 2),
225         0x00000000,
226         0x3,
227         (0x0e00 << 16) | (0xc130 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc134 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc1fc >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc208 >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc264 >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc268 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc26c >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc270 >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc274 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc278 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc27c >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc280 >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc284 >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0xc288 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0xc28c >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xc290 >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xc294 >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc298 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc29c >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc2a0 >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc2a4 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc2a8 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc2ac  >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc2b0 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0x301d0 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x30238 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x30250 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0x30254 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0x30258 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0x3025c >> 2),
286         0x00000000,
287         (0x4e00 << 16) | (0xc900 >> 2),
288         0x00000000,
289         (0x5e00 << 16) | (0xc900 >> 2),
290         0x00000000,
291         (0x6e00 << 16) | (0xc900 >> 2),
292         0x00000000,
293         (0x7e00 << 16) | (0xc900 >> 2),
294         0x00000000,
295         (0x8e00 << 16) | (0xc900 >> 2),
296         0x00000000,
297         (0x9e00 << 16) | (0xc900 >> 2),
298         0x00000000,
299         (0xae00 << 16) | (0xc900 >> 2),
300         0x00000000,
301         (0xbe00 << 16) | (0xc900 >> 2),
302         0x00000000,
303         (0x4e00 << 16) | (0xc904 >> 2),
304         0x00000000,
305         (0x5e00 << 16) | (0xc904 >> 2),
306         0x00000000,
307         (0x6e00 << 16) | (0xc904 >> 2),
308         0x00000000,
309         (0x7e00 << 16) | (0xc904 >> 2),
310         0x00000000,
311         (0x8e00 << 16) | (0xc904 >> 2),
312         0x00000000,
313         (0x9e00 << 16) | (0xc904 >> 2),
314         0x00000000,
315         (0xae00 << 16) | (0xc904 >> 2),
316         0x00000000,
317         (0xbe00 << 16) | (0xc904 >> 2),
318         0x00000000,
319         (0x4e00 << 16) | (0xc908 >> 2),
320         0x00000000,
321         (0x5e00 << 16) | (0xc908 >> 2),
322         0x00000000,
323         (0x6e00 << 16) | (0xc908 >> 2),
324         0x00000000,
325         (0x7e00 << 16) | (0xc908 >> 2),
326         0x00000000,
327         (0x8e00 << 16) | (0xc908 >> 2),
328         0x00000000,
329         (0x9e00 << 16) | (0xc908 >> 2),
330         0x00000000,
331         (0xae00 << 16) | (0xc908 >> 2),
332         0x00000000,
333         (0xbe00 << 16) | (0xc908 >> 2),
334         0x00000000,
335         (0x4e00 << 16) | (0xc90c >> 2),
336         0x00000000,
337         (0x5e00 << 16) | (0xc90c >> 2),
338         0x00000000,
339         (0x6e00 << 16) | (0xc90c >> 2),
340         0x00000000,
341         (0x7e00 << 16) | (0xc90c >> 2),
342         0x00000000,
343         (0x8e00 << 16) | (0xc90c >> 2),
344         0x00000000,
345         (0x9e00 << 16) | (0xc90c >> 2),
346         0x00000000,
347         (0xae00 << 16) | (0xc90c >> 2),
348         0x00000000,
349         (0xbe00 << 16) | (0xc90c >> 2),
350         0x00000000,
351         (0x4e00 << 16) | (0xc910 >> 2),
352         0x00000000,
353         (0x5e00 << 16) | (0xc910 >> 2),
354         0x00000000,
355         (0x6e00 << 16) | (0xc910 >> 2),
356         0x00000000,
357         (0x7e00 << 16) | (0xc910 >> 2),
358         0x00000000,
359         (0x8e00 << 16) | (0xc910 >> 2),
360         0x00000000,
361         (0x9e00 << 16) | (0xc910 >> 2),
362         0x00000000,
363         (0xae00 << 16) | (0xc910 >> 2),
364         0x00000000,
365         (0xbe00 << 16) | (0xc910 >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0xc99c >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0x9834 >> 2),
370         0x00000000,
371         (0x0000 << 16) | (0x30f00 >> 2),
372         0x00000000,
373         (0x0001 << 16) | (0x30f00 >> 2),
374         0x00000000,
375         (0x0000 << 16) | (0x30f04 >> 2),
376         0x00000000,
377         (0x0001 << 16) | (0x30f04 >> 2),
378         0x00000000,
379         (0x0000 << 16) | (0x30f08 >> 2),
380         0x00000000,
381         (0x0001 << 16) | (0x30f08 >> 2),
382         0x00000000,
383         (0x0000 << 16) | (0x30f0c >> 2),
384         0x00000000,
385         (0x0001 << 16) | (0x30f0c >> 2),
386         0x00000000,
387         (0x0600 << 16) | (0x9b7c >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0x8a14 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x8a18 >> 2),
392         0x00000000,
393         (0x0600 << 16) | (0x30a00 >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x8bf0 >> 2),
396         0x00000000,
397         (0x0e00 << 16) | (0x8bcc >> 2),
398         0x00000000,
399         (0x0e00 << 16) | (0x8b24 >> 2),
400         0x00000000,
401         (0x0e00 << 16) | (0x30a04 >> 2),
402         0x00000000,
403         (0x0600 << 16) | (0x30a10 >> 2),
404         0x00000000,
405         (0x0600 << 16) | (0x30a14 >> 2),
406         0x00000000,
407         (0x0600 << 16) | (0x30a18 >> 2),
408         0x00000000,
409         (0x0600 << 16) | (0x30a2c >> 2),
410         0x00000000,
411         (0x0e00 << 16) | (0xc700 >> 2),
412         0x00000000,
413         (0x0e00 << 16) | (0xc704 >> 2),
414         0x00000000,
415         (0x0e00 << 16) | (0xc708 >> 2),
416         0x00000000,
417         (0x0e00 << 16) | (0xc768 >> 2),
418         0x00000000,
419         (0x0400 << 16) | (0xc770 >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc774 >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc778 >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc77c >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc780 >> 2),
428         0x00000000,
429         (0x0400 << 16) | (0xc784 >> 2),
430         0x00000000,
431         (0x0400 << 16) | (0xc788 >> 2),
432         0x00000000,
433         (0x0400 << 16) | (0xc78c >> 2),
434         0x00000000,
435         (0x0400 << 16) | (0xc798 >> 2),
436         0x00000000,
437         (0x0400 << 16) | (0xc79c >> 2),
438         0x00000000,
439         (0x0400 << 16) | (0xc7a0 >> 2),
440         0x00000000,
441         (0x0400 << 16) | (0xc7a4 >> 2),
442         0x00000000,
443         (0x0400 << 16) | (0xc7a8 >> 2),
444         0x00000000,
445         (0x0400 << 16) | (0xc7ac >> 2),
446         0x00000000,
447         (0x0400 << 16) | (0xc7b0 >> 2),
448         0x00000000,
449         (0x0400 << 16) | (0xc7b4 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x9100 >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x3c010 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x92a8 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x92ac >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x92b4 >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x92b8 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0x92bc >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x92c0 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0x92c4 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0x92c8 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0x92cc >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0x92d0 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0x8c00 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8c04 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0x8c20 >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0x8c38 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0x8c3c >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xae00 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x9604 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0xac08 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac0c >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac10 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac14 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0xac58 >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xac68 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xac6c >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xac70 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xac74 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xac78 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xac7c >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xac80 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0xac84 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0xac88 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0xac8c >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x970c >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x9714 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x9718 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0x971c >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x31068 >> 2),
528         0x00000000,
529         (0x4e00 << 16) | (0x31068 >> 2),
530         0x00000000,
531         (0x5e00 << 16) | (0x31068 >> 2),
532         0x00000000,
533         (0x6e00 << 16) | (0x31068 >> 2),
534         0x00000000,
535         (0x7e00 << 16) | (0x31068 >> 2),
536         0x00000000,
537         (0x8e00 << 16) | (0x31068 >> 2),
538         0x00000000,
539         (0x9e00 << 16) | (0x31068 >> 2),
540         0x00000000,
541         (0xae00 << 16) | (0x31068 >> 2),
542         0x00000000,
543         (0xbe00 << 16) | (0x31068 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xcd10 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xcd14 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x88b0 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x88b4 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x88b8 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x88bc >> 2),
556         0x00000000,
557         (0x0400 << 16) | (0x89c0 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x88c4 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x88c8 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x88d0 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x88d4 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x88d8 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x8980 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x30938 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x3093c >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x30940 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x89a0 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x30900 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x30904 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x89b4 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x3c210 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x3c214 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x3c218 >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0x8904 >> 2),
592         0x00000000,
593         0x5,
594         (0x0e00 << 16) | (0x8c28 >> 2),
595         (0x0e00 << 16) | (0x8c2c >> 2),
596         (0x0e00 << 16) | (0x8c30 >> 2),
597         (0x0e00 << 16) | (0x8c34 >> 2),
598         (0x0e00 << 16) | (0x9600 >> 2),
599 };
600
601 static const u32 kalindi_rlc_save_restore_register_list[] =
602 {
603         (0x0e00 << 16) | (0xc12c >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xc140 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xc150 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xc15c >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xc168 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xc170 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc204 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xc2b4 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xc2b8 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0xc2bc >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0xc2c0 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x8228 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x829c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x869c >> 2),
630         0x00000000,
631         (0x0600 << 16) | (0x98f4 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x98f8 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x9900 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0xc260 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x90e8 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x3c000 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x3c00c >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x8c1c >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0x9700 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0xcd20 >> 2),
650         0x00000000,
651         (0x4e00 << 16) | (0xcd20 >> 2),
652         0x00000000,
653         (0x5e00 << 16) | (0xcd20 >> 2),
654         0x00000000,
655         (0x6e00 << 16) | (0xcd20 >> 2),
656         0x00000000,
657         (0x7e00 << 16) | (0xcd20 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x89bc >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x8900 >> 2),
662         0x00000000,
663         0x3,
664         (0x0e00 << 16) | (0xc130 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc134 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc1fc >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc208 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc264 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc268 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc26c >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc270 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0xc274 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0xc28c >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0xc290 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xc294 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0xc298 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc2a0 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0xc2a4 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0xc2a8 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0xc2ac >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x301d0 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x30238 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x30250 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x30254 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x30258 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0x3025c >> 2),
709         0x00000000,
710         (0x4e00 << 16) | (0xc900 >> 2),
711         0x00000000,
712         (0x5e00 << 16) | (0xc900 >> 2),
713         0x00000000,
714         (0x6e00 << 16) | (0xc900 >> 2),
715         0x00000000,
716         (0x7e00 << 16) | (0xc900 >> 2),
717         0x00000000,
718         (0x4e00 << 16) | (0xc904 >> 2),
719         0x00000000,
720         (0x5e00 << 16) | (0xc904 >> 2),
721         0x00000000,
722         (0x6e00 << 16) | (0xc904 >> 2),
723         0x00000000,
724         (0x7e00 << 16) | (0xc904 >> 2),
725         0x00000000,
726         (0x4e00 << 16) | (0xc908 >> 2),
727         0x00000000,
728         (0x5e00 << 16) | (0xc908 >> 2),
729         0x00000000,
730         (0x6e00 << 16) | (0xc908 >> 2),
731         0x00000000,
732         (0x7e00 << 16) | (0xc908 >> 2),
733         0x00000000,
734         (0x4e00 << 16) | (0xc90c >> 2),
735         0x00000000,
736         (0x5e00 << 16) | (0xc90c >> 2),
737         0x00000000,
738         (0x6e00 << 16) | (0xc90c >> 2),
739         0x00000000,
740         (0x7e00 << 16) | (0xc90c >> 2),
741         0x00000000,
742         (0x4e00 << 16) | (0xc910 >> 2),
743         0x00000000,
744         (0x5e00 << 16) | (0xc910 >> 2),
745         0x00000000,
746         (0x6e00 << 16) | (0xc910 >> 2),
747         0x00000000,
748         (0x7e00 << 16) | (0xc910 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc99c >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x9834 >> 2),
753         0x00000000,
754         (0x0000 << 16) | (0x30f00 >> 2),
755         0x00000000,
756         (0x0000 << 16) | (0x30f04 >> 2),
757         0x00000000,
758         (0x0000 << 16) | (0x30f08 >> 2),
759         0x00000000,
760         (0x0000 << 16) | (0x30f0c >> 2),
761         0x00000000,
762         (0x0600 << 16) | (0x9b7c >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x8a14 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0x8a18 >> 2),
767         0x00000000,
768         (0x0600 << 16) | (0x30a00 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0x8bf0 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x8bcc >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8b24 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x30a04 >> 2),
777         0x00000000,
778         (0x0600 << 16) | (0x30a10 >> 2),
779         0x00000000,
780         (0x0600 << 16) | (0x30a14 >> 2),
781         0x00000000,
782         (0x0600 << 16) | (0x30a18 >> 2),
783         0x00000000,
784         (0x0600 << 16) | (0x30a2c >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc700 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc704 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0xc708 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0xc768 >> 2),
793         0x00000000,
794         (0x0400 << 16) | (0xc770 >> 2),
795         0x00000000,
796         (0x0400 << 16) | (0xc774 >> 2),
797         0x00000000,
798         (0x0400 << 16) | (0xc798 >> 2),
799         0x00000000,
800         (0x0400 << 16) | (0xc79c >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x9100 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x3c010 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8c00 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8c04 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x8c20 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0x8c38 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0x8c3c >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0xae00 >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x9604 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xac08 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac0c >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac10 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac14 >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xac58 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0xac68 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0xac6c >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0xac70 >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0xac74 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xac78 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xac7c >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xac80 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xac84 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xac88 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0xac8c >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x970c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x9714 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x9718 >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x971c >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x31068 >> 2),
859         0x00000000,
860         (0x4e00 << 16) | (0x31068 >> 2),
861         0x00000000,
862         (0x5e00 << 16) | (0x31068 >> 2),
863         0x00000000,
864         (0x6e00 << 16) | (0x31068 >> 2),
865         0x00000000,
866         (0x7e00 << 16) | (0x31068 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0xcd10 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xcd14 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x88b0 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0x88b4 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0x88b8 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x88bc >> 2),
879         0x00000000,
880         (0x0400 << 16) | (0x89c0 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x88c4 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x88c8 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x88d0 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x88d4 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x88d8 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x8980 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x30938 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x3093c >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x30940 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x89a0 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x30900 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x30904 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x89b4 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x3e1fc >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x3c210 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x3c214 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0x3c218 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0x8904 >> 2),
917         0x00000000,
918         0x5,
919         (0x0e00 << 16) | (0x8c28 >> 2),
920         (0x0e00 << 16) | (0x8c2c >> 2),
921         (0x0e00 << 16) | (0x8c30 >> 2),
922         (0x0e00 << 16) | (0x8c34 >> 2),
923         (0x0e00 << 16) | (0x9600 >> 2),
924 };
925
926 static const u32 bonaire_golden_spm_registers[] =
927 {
928         0x30800, 0xe0ffffff, 0xe0000000
929 };
930
931 static const u32 bonaire_golden_common_registers[] =
932 {
933         0xc770, 0xffffffff, 0x00000800,
934         0xc774, 0xffffffff, 0x00000800,
935         0xc798, 0xffffffff, 0x00007fbf,
936         0xc79c, 0xffffffff, 0x00007faf
937 };
938
939 static const u32 bonaire_golden_registers[] =
940 {
941         0x3354, 0x00000333, 0x00000333,
942         0x3350, 0x000c0fc0, 0x00040200,
943         0x9a10, 0x00010000, 0x00058208,
944         0x3c000, 0xffff1fff, 0x00140000,
945         0x3c200, 0xfdfc0fff, 0x00000100,
946         0x3c234, 0x40000000, 0x40000200,
947         0x9830, 0xffffffff, 0x00000000,
948         0x9834, 0xf00fffff, 0x00000400,
949         0x9838, 0x0002021c, 0x00020200,
950         0xc78, 0x00000080, 0x00000000,
951         0x5bb0, 0x000000f0, 0x00000070,
952         0x5bc0, 0xf0311fff, 0x80300000,
953         0x98f8, 0x73773777, 0x12010001,
954         0x350c, 0x00810000, 0x408af000,
955         0x7030, 0x31000111, 0x00000011,
956         0x2f48, 0x73773777, 0x12010001,
957         0x220c, 0x00007fb6, 0x0021a1b1,
958         0x2210, 0x00007fb6, 0x002021b1,
959         0x2180, 0x00007fb6, 0x00002191,
960         0x2218, 0x00007fb6, 0x002121b1,
961         0x221c, 0x00007fb6, 0x002021b1,
962         0x21dc, 0x00007fb6, 0x00002191,
963         0x21e0, 0x00007fb6, 0x00002191,
964         0x3628, 0x0000003f, 0x0000000a,
965         0x362c, 0x0000003f, 0x0000000a,
966         0x2ae4, 0x00073ffe, 0x000022a2,
967         0x240c, 0x000007ff, 0x00000000,
968         0x8a14, 0xf000003f, 0x00000007,
969         0x8bf0, 0x00002001, 0x00000001,
970         0x8b24, 0xffffffff, 0x00ffffff,
971         0x30a04, 0x0000ff0f, 0x00000000,
972         0x28a4c, 0x07ffffff, 0x06000000,
973         0x4d8, 0x00000fff, 0x00000100,
974         0x3e78, 0x00000001, 0x00000002,
975         0x9100, 0x03000000, 0x0362c688,
976         0x8c00, 0x000000ff, 0x00000001,
977         0xe40, 0x00001fff, 0x00001fff,
978         0x9060, 0x0000007f, 0x00000020,
979         0x9508, 0x00010000, 0x00010000,
980         0xac14, 0x000003ff, 0x000000f3,
981         0xac0c, 0xffffffff, 0x00001032
982 };
983
984 static const u32 bonaire_mgcg_cgcg_init[] =
985 {
986         0xc420, 0xffffffff, 0xfffffffc,
987         0x30800, 0xffffffff, 0xe0000000,
988         0x3c2a0, 0xffffffff, 0x00000100,
989         0x3c208, 0xffffffff, 0x00000100,
990         0x3c2c0, 0xffffffff, 0xc0000100,
991         0x3c2c8, 0xffffffff, 0xc0000100,
992         0x3c2c4, 0xffffffff, 0xc0000100,
993         0x55e4, 0xffffffff, 0x00600100,
994         0x3c280, 0xffffffff, 0x00000100,
995         0x3c214, 0xffffffff, 0x06000100,
996         0x3c220, 0xffffffff, 0x00000100,
997         0x3c218, 0xffffffff, 0x06000100,
998         0x3c204, 0xffffffff, 0x00000100,
999         0x3c2e0, 0xffffffff, 0x00000100,
1000         0x3c224, 0xffffffff, 0x00000100,
1001         0x3c200, 0xffffffff, 0x00000100,
1002         0x3c230, 0xffffffff, 0x00000100,
1003         0x3c234, 0xffffffff, 0x00000100,
1004         0x3c250, 0xffffffff, 0x00000100,
1005         0x3c254, 0xffffffff, 0x00000100,
1006         0x3c258, 0xffffffff, 0x00000100,
1007         0x3c25c, 0xffffffff, 0x00000100,
1008         0x3c260, 0xffffffff, 0x00000100,
1009         0x3c27c, 0xffffffff, 0x00000100,
1010         0x3c278, 0xffffffff, 0x00000100,
1011         0x3c210, 0xffffffff, 0x06000100,
1012         0x3c290, 0xffffffff, 0x00000100,
1013         0x3c274, 0xffffffff, 0x00000100,
1014         0x3c2b4, 0xffffffff, 0x00000100,
1015         0x3c2b0, 0xffffffff, 0x00000100,
1016         0x3c270, 0xffffffff, 0x00000100,
1017         0x30800, 0xffffffff, 0xe0000000,
1018         0x3c020, 0xffffffff, 0x00010000,
1019         0x3c024, 0xffffffff, 0x00030002,
1020         0x3c028, 0xffffffff, 0x00040007,
1021         0x3c02c, 0xffffffff, 0x00060005,
1022         0x3c030, 0xffffffff, 0x00090008,
1023         0x3c034, 0xffffffff, 0x00010000,
1024         0x3c038, 0xffffffff, 0x00030002,
1025         0x3c03c, 0xffffffff, 0x00040007,
1026         0x3c040, 0xffffffff, 0x00060005,
1027         0x3c044, 0xffffffff, 0x00090008,
1028         0x3c048, 0xffffffff, 0x00010000,
1029         0x3c04c, 0xffffffff, 0x00030002,
1030         0x3c050, 0xffffffff, 0x00040007,
1031         0x3c054, 0xffffffff, 0x00060005,
1032         0x3c058, 0xffffffff, 0x00090008,
1033         0x3c05c, 0xffffffff, 0x00010000,
1034         0x3c060, 0xffffffff, 0x00030002,
1035         0x3c064, 0xffffffff, 0x00040007,
1036         0x3c068, 0xffffffff, 0x00060005,
1037         0x3c06c, 0xffffffff, 0x00090008,
1038         0x3c070, 0xffffffff, 0x00010000,
1039         0x3c074, 0xffffffff, 0x00030002,
1040         0x3c078, 0xffffffff, 0x00040007,
1041         0x3c07c, 0xffffffff, 0x00060005,
1042         0x3c080, 0xffffffff, 0x00090008,
1043         0x3c084, 0xffffffff, 0x00010000,
1044         0x3c088, 0xffffffff, 0x00030002,
1045         0x3c08c, 0xffffffff, 0x00040007,
1046         0x3c090, 0xffffffff, 0x00060005,
1047         0x3c094, 0xffffffff, 0x00090008,
1048         0x3c098, 0xffffffff, 0x00010000,
1049         0x3c09c, 0xffffffff, 0x00030002,
1050         0x3c0a0, 0xffffffff, 0x00040007,
1051         0x3c0a4, 0xffffffff, 0x00060005,
1052         0x3c0a8, 0xffffffff, 0x00090008,
1053         0x3c000, 0xffffffff, 0x96e00200,
1054         0x8708, 0xffffffff, 0x00900100,
1055         0xc424, 0xffffffff, 0x0020003f,
1056         0x38, 0xffffffff, 0x0140001c,
1057         0x3c, 0x000f0000, 0x000f0000,
1058         0x220, 0xffffffff, 0xC060000C,
1059         0x224, 0xc0000fff, 0x00000100,
1060         0xf90, 0xffffffff, 0x00000100,
1061         0xf98, 0x00000101, 0x00000000,
1062         0x20a8, 0xffffffff, 0x00000104,
1063         0x55e4, 0xff000fff, 0x00000100,
1064         0x30cc, 0xc0000fff, 0x00000104,
1065         0xc1e4, 0x00000001, 0x00000001,
1066         0xd00c, 0xff000ff0, 0x00000100,
1067         0xd80c, 0xff000ff0, 0x00000100
1068 };
1069
1070 static const u32 spectre_golden_spm_registers[] =
1071 {
1072         0x30800, 0xe0ffffff, 0xe0000000
1073 };
1074
1075 static const u32 spectre_golden_common_registers[] =
1076 {
1077         0xc770, 0xffffffff, 0x00000800,
1078         0xc774, 0xffffffff, 0x00000800,
1079         0xc798, 0xffffffff, 0x00007fbf,
1080         0xc79c, 0xffffffff, 0x00007faf
1081 };
1082
1083 static const u32 spectre_golden_registers[] =
1084 {
1085         0x3c000, 0xffff1fff, 0x96940200,
1086         0x3c00c, 0xffff0001, 0xff000000,
1087         0x3c200, 0xfffc0fff, 0x00000100,
1088         0x6ed8, 0x00010101, 0x00010000,
1089         0x9834, 0xf00fffff, 0x00000400,
1090         0x9838, 0xfffffffc, 0x00020200,
1091         0x5bb0, 0x000000f0, 0x00000070,
1092         0x5bc0, 0xf0311fff, 0x80300000,
1093         0x98f8, 0x73773777, 0x12010001,
1094         0x9b7c, 0x00ff0000, 0x00fc0000,
1095         0x2f48, 0x73773777, 0x12010001,
1096         0x8a14, 0xf000003f, 0x00000007,
1097         0x8b24, 0xffffffff, 0x00ffffff,
1098         0x28350, 0x3f3f3fff, 0x00000082,
1099         0x28355, 0x0000003f, 0x00000000,
1100         0x3e78, 0x00000001, 0x00000002,
1101         0x913c, 0xffff03df, 0x00000004,
1102         0xc768, 0x00000008, 0x00000008,
1103         0x8c00, 0x000008ff, 0x00000800,
1104         0x9508, 0x00010000, 0x00010000,
1105         0xac0c, 0xffffffff, 0x54763210,
1106         0x214f8, 0x01ff01ff, 0x00000002,
1107         0x21498, 0x007ff800, 0x00200000,
1108         0x2015c, 0xffffffff, 0x00000f40,
1109         0x30934, 0xffffffff, 0x00000001
1110 };
1111
1112 static const u32 spectre_mgcg_cgcg_init[] =
1113 {
1114         0xc420, 0xffffffff, 0xfffffffc,
1115         0x30800, 0xffffffff, 0xe0000000,
1116         0x3c2a0, 0xffffffff, 0x00000100,
1117         0x3c208, 0xffffffff, 0x00000100,
1118         0x3c2c0, 0xffffffff, 0x00000100,
1119         0x3c2c8, 0xffffffff, 0x00000100,
1120         0x3c2c4, 0xffffffff, 0x00000100,
1121         0x55e4, 0xffffffff, 0x00600100,
1122         0x3c280, 0xffffffff, 0x00000100,
1123         0x3c214, 0xffffffff, 0x06000100,
1124         0x3c220, 0xffffffff, 0x00000100,
1125         0x3c218, 0xffffffff, 0x06000100,
1126         0x3c204, 0xffffffff, 0x00000100,
1127         0x3c2e0, 0xffffffff, 0x00000100,
1128         0x3c224, 0xffffffff, 0x00000100,
1129         0x3c200, 0xffffffff, 0x00000100,
1130         0x3c230, 0xffffffff, 0x00000100,
1131         0x3c234, 0xffffffff, 0x00000100,
1132         0x3c250, 0xffffffff, 0x00000100,
1133         0x3c254, 0xffffffff, 0x00000100,
1134         0x3c258, 0xffffffff, 0x00000100,
1135         0x3c25c, 0xffffffff, 0x00000100,
1136         0x3c260, 0xffffffff, 0x00000100,
1137         0x3c27c, 0xffffffff, 0x00000100,
1138         0x3c278, 0xffffffff, 0x00000100,
1139         0x3c210, 0xffffffff, 0x06000100,
1140         0x3c290, 0xffffffff, 0x00000100,
1141         0x3c274, 0xffffffff, 0x00000100,
1142         0x3c2b4, 0xffffffff, 0x00000100,
1143         0x3c2b0, 0xffffffff, 0x00000100,
1144         0x3c270, 0xffffffff, 0x00000100,
1145         0x30800, 0xffffffff, 0xe0000000,
1146         0x3c020, 0xffffffff, 0x00010000,
1147         0x3c024, 0xffffffff, 0x00030002,
1148         0x3c028, 0xffffffff, 0x00040007,
1149         0x3c02c, 0xffffffff, 0x00060005,
1150         0x3c030, 0xffffffff, 0x00090008,
1151         0x3c034, 0xffffffff, 0x00010000,
1152         0x3c038, 0xffffffff, 0x00030002,
1153         0x3c03c, 0xffffffff, 0x00040007,
1154         0x3c040, 0xffffffff, 0x00060005,
1155         0x3c044, 0xffffffff, 0x00090008,
1156         0x3c048, 0xffffffff, 0x00010000,
1157         0x3c04c, 0xffffffff, 0x00030002,
1158         0x3c050, 0xffffffff, 0x00040007,
1159         0x3c054, 0xffffffff, 0x00060005,
1160         0x3c058, 0xffffffff, 0x00090008,
1161         0x3c05c, 0xffffffff, 0x00010000,
1162         0x3c060, 0xffffffff, 0x00030002,
1163         0x3c064, 0xffffffff, 0x00040007,
1164         0x3c068, 0xffffffff, 0x00060005,
1165         0x3c06c, 0xffffffff, 0x00090008,
1166         0x3c070, 0xffffffff, 0x00010000,
1167         0x3c074, 0xffffffff, 0x00030002,
1168         0x3c078, 0xffffffff, 0x00040007,
1169         0x3c07c, 0xffffffff, 0x00060005,
1170         0x3c080, 0xffffffff, 0x00090008,
1171         0x3c084, 0xffffffff, 0x00010000,
1172         0x3c088, 0xffffffff, 0x00030002,
1173         0x3c08c, 0xffffffff, 0x00040007,
1174         0x3c090, 0xffffffff, 0x00060005,
1175         0x3c094, 0xffffffff, 0x00090008,
1176         0x3c098, 0xffffffff, 0x00010000,
1177         0x3c09c, 0xffffffff, 0x00030002,
1178         0x3c0a0, 0xffffffff, 0x00040007,
1179         0x3c0a4, 0xffffffff, 0x00060005,
1180         0x3c0a8, 0xffffffff, 0x00090008,
1181         0x3c0ac, 0xffffffff, 0x00010000,
1182         0x3c0b0, 0xffffffff, 0x00030002,
1183         0x3c0b4, 0xffffffff, 0x00040007,
1184         0x3c0b8, 0xffffffff, 0x00060005,
1185         0x3c0bc, 0xffffffff, 0x00090008,
1186         0x3c000, 0xffffffff, 0x96e00200,
1187         0x8708, 0xffffffff, 0x00900100,
1188         0xc424, 0xffffffff, 0x0020003f,
1189         0x38, 0xffffffff, 0x0140001c,
1190         0x3c, 0x000f0000, 0x000f0000,
1191         0x220, 0xffffffff, 0xC060000C,
1192         0x224, 0xc0000fff, 0x00000100,
1193         0xf90, 0xffffffff, 0x00000100,
1194         0xf98, 0x00000101, 0x00000000,
1195         0x20a8, 0xffffffff, 0x00000104,
1196         0x55e4, 0xff000fff, 0x00000100,
1197         0x30cc, 0xc0000fff, 0x00000104,
1198         0xc1e4, 0x00000001, 0x00000001,
1199         0xd00c, 0xff000ff0, 0x00000100,
1200         0xd80c, 0xff000ff0, 0x00000100
1201 };
1202
1203 static const u32 kalindi_golden_spm_registers[] =
1204 {
1205         0x30800, 0xe0ffffff, 0xe0000000
1206 };
1207
1208 static const u32 kalindi_golden_common_registers[] =
1209 {
1210         0xc770, 0xffffffff, 0x00000800,
1211         0xc774, 0xffffffff, 0x00000800,
1212         0xc798, 0xffffffff, 0x00007fbf,
1213         0xc79c, 0xffffffff, 0x00007faf
1214 };
1215
1216 static const u32 kalindi_golden_registers[] =
1217 {
1218         0x3c000, 0xffffdfff, 0x6e944040,
1219         0x55e4, 0xff607fff, 0xfc000100,
1220         0x3c220, 0xff000fff, 0x00000100,
1221         0x3c224, 0xff000fff, 0x00000100,
1222         0x3c200, 0xfffc0fff, 0x00000100,
1223         0x6ed8, 0x00010101, 0x00010000,
1224         0x9830, 0xffffffff, 0x00000000,
1225         0x9834, 0xf00fffff, 0x00000400,
1226         0x5bb0, 0x000000f0, 0x00000070,
1227         0x5bc0, 0xf0311fff, 0x80300000,
1228         0x98f8, 0x73773777, 0x12010001,
1229         0x98fc, 0xffffffff, 0x00000010,
1230         0x9b7c, 0x00ff0000, 0x00fc0000,
1231         0x8030, 0x00001f0f, 0x0000100a,
1232         0x2f48, 0x73773777, 0x12010001,
1233         0x2408, 0x000fffff, 0x000c007f,
1234         0x8a14, 0xf000003f, 0x00000007,
1235         0x8b24, 0x3fff3fff, 0x00ffcfff,
1236         0x30a04, 0x0000ff0f, 0x00000000,
1237         0x28a4c, 0x07ffffff, 0x06000000,
1238         0x4d8, 0x00000fff, 0x00000100,
1239         0x3e78, 0x00000001, 0x00000002,
1240         0xc768, 0x00000008, 0x00000008,
1241         0x8c00, 0x000000ff, 0x00000003,
1242         0x214f8, 0x01ff01ff, 0x00000002,
1243         0x21498, 0x007ff800, 0x00200000,
1244         0x2015c, 0xffffffff, 0x00000f40,
1245         0x88c4, 0x001f3ae3, 0x00000082,
1246         0x88d4, 0x0000001f, 0x00000010,
1247         0x30934, 0xffffffff, 0x00000000
1248 };
1249
1250 static const u32 kalindi_mgcg_cgcg_init[] =
1251 {
1252         0xc420, 0xffffffff, 0xfffffffc,
1253         0x30800, 0xffffffff, 0xe0000000,
1254         0x3c2a0, 0xffffffff, 0x00000100,
1255         0x3c208, 0xffffffff, 0x00000100,
1256         0x3c2c0, 0xffffffff, 0x00000100,
1257         0x3c2c8, 0xffffffff, 0x00000100,
1258         0x3c2c4, 0xffffffff, 0x00000100,
1259         0x55e4, 0xffffffff, 0x00600100,
1260         0x3c280, 0xffffffff, 0x00000100,
1261         0x3c214, 0xffffffff, 0x06000100,
1262         0x3c220, 0xffffffff, 0x00000100,
1263         0x3c218, 0xffffffff, 0x06000100,
1264         0x3c204, 0xffffffff, 0x00000100,
1265         0x3c2e0, 0xffffffff, 0x00000100,
1266         0x3c224, 0xffffffff, 0x00000100,
1267         0x3c200, 0xffffffff, 0x00000100,
1268         0x3c230, 0xffffffff, 0x00000100,
1269         0x3c234, 0xffffffff, 0x00000100,
1270         0x3c250, 0xffffffff, 0x00000100,
1271         0x3c254, 0xffffffff, 0x00000100,
1272         0x3c258, 0xffffffff, 0x00000100,
1273         0x3c25c, 0xffffffff, 0x00000100,
1274         0x3c260, 0xffffffff, 0x00000100,
1275         0x3c27c, 0xffffffff, 0x00000100,
1276         0x3c278, 0xffffffff, 0x00000100,
1277         0x3c210, 0xffffffff, 0x06000100,
1278         0x3c290, 0xffffffff, 0x00000100,
1279         0x3c274, 0xffffffff, 0x00000100,
1280         0x3c2b4, 0xffffffff, 0x00000100,
1281         0x3c2b0, 0xffffffff, 0x00000100,
1282         0x3c270, 0xffffffff, 0x00000100,
1283         0x30800, 0xffffffff, 0xe0000000,
1284         0x3c020, 0xffffffff, 0x00010000,
1285         0x3c024, 0xffffffff, 0x00030002,
1286         0x3c028, 0xffffffff, 0x00040007,
1287         0x3c02c, 0xffffffff, 0x00060005,
1288         0x3c030, 0xffffffff, 0x00090008,
1289         0x3c034, 0xffffffff, 0x00010000,
1290         0x3c038, 0xffffffff, 0x00030002,
1291         0x3c03c, 0xffffffff, 0x00040007,
1292         0x3c040, 0xffffffff, 0x00060005,
1293         0x3c044, 0xffffffff, 0x00090008,
1294         0x3c000, 0xffffffff, 0x96e00200,
1295         0x8708, 0xffffffff, 0x00900100,
1296         0xc424, 0xffffffff, 0x0020003f,
1297         0x38, 0xffffffff, 0x0140001c,
1298         0x3c, 0x000f0000, 0x000f0000,
1299         0x220, 0xffffffff, 0xC060000C,
1300         0x224, 0xc0000fff, 0x00000100,
1301         0x20a8, 0xffffffff, 0x00000104,
1302         0x55e4, 0xff000fff, 0x00000100,
1303         0x30cc, 0xc0000fff, 0x00000104,
1304         0xc1e4, 0x00000001, 0x00000001,
1305         0xd00c, 0xff000ff0, 0x00000100,
1306         0xd80c, 0xff000ff0, 0x00000100
1307 };
1308
1309 static const u32 hawaii_golden_spm_registers[] =
1310 {
1311         0x30800, 0xe0ffffff, 0xe0000000
1312 };
1313
1314 static const u32 hawaii_golden_common_registers[] =
1315 {
1316         0x30800, 0xffffffff, 0xe0000000,
1317         0x28350, 0xffffffff, 0x3a00161a,
1318         0x28354, 0xffffffff, 0x0000002e,
1319         0x9a10, 0xffffffff, 0x00018208,
1320         0x98f8, 0xffffffff, 0x12011003
1321 };
1322
1323 static const u32 hawaii_golden_registers[] =
1324 {
1325         0x3354, 0x00000333, 0x00000333,
1326         0x9a10, 0x00010000, 0x00058208,
1327         0x9830, 0xffffffff, 0x00000000,
1328         0x9834, 0xf00fffff, 0x00000400,
1329         0x9838, 0x0002021c, 0x00020200,
1330         0xc78, 0x00000080, 0x00000000,
1331         0x5bb0, 0x000000f0, 0x00000070,
1332         0x5bc0, 0xf0311fff, 0x80300000,
1333         0x350c, 0x00810000, 0x408af000,
1334         0x7030, 0x31000111, 0x00000011,
1335         0x2f48, 0x73773777, 0x12010001,
1336         0x2120, 0x0000007f, 0x0000001b,
1337         0x21dc, 0x00007fb6, 0x00002191,
1338         0x3628, 0x0000003f, 0x0000000a,
1339         0x362c, 0x0000003f, 0x0000000a,
1340         0x2ae4, 0x00073ffe, 0x000022a2,
1341         0x240c, 0x000007ff, 0x00000000,
1342         0x8bf0, 0x00002001, 0x00000001,
1343         0x8b24, 0xffffffff, 0x00ffffff,
1344         0x30a04, 0x0000ff0f, 0x00000000,
1345         0x28a4c, 0x07ffffff, 0x06000000,
1346         0x3e78, 0x00000001, 0x00000002,
1347         0xc768, 0x00000008, 0x00000008,
1348         0xc770, 0x00000f00, 0x00000800,
1349         0xc774, 0x00000f00, 0x00000800,
1350         0xc798, 0x00ffffff, 0x00ff7fbf,
1351         0xc79c, 0x00ffffff, 0x00ff7faf,
1352         0x8c00, 0x000000ff, 0x00000800,
1353         0xe40, 0x00001fff, 0x00001fff,
1354         0x9060, 0x0000007f, 0x00000020,
1355         0x9508, 0x00010000, 0x00010000,
1356         0xae00, 0x00100000, 0x000ff07c,
1357         0xac14, 0x000003ff, 0x0000000f,
1358         0xac10, 0xffffffff, 0x7564fdec,
1359         0xac0c, 0xffffffff, 0x3120b9a8,
1360         0xac08, 0x20000000, 0x0f9c0000
1361 };
1362
1363 static const u32 hawaii_mgcg_cgcg_init[] =
1364 {
1365         0xc420, 0xffffffff, 0xfffffffd,
1366         0x30800, 0xffffffff, 0xe0000000,
1367         0x3c2a0, 0xffffffff, 0x00000100,
1368         0x3c208, 0xffffffff, 0x00000100,
1369         0x3c2c0, 0xffffffff, 0x00000100,
1370         0x3c2c8, 0xffffffff, 0x00000100,
1371         0x3c2c4, 0xffffffff, 0x00000100,
1372         0x55e4, 0xffffffff, 0x00200100,
1373         0x3c280, 0xffffffff, 0x00000100,
1374         0x3c214, 0xffffffff, 0x06000100,
1375         0x3c220, 0xffffffff, 0x00000100,
1376         0x3c218, 0xffffffff, 0x06000100,
1377         0x3c204, 0xffffffff, 0x00000100,
1378         0x3c2e0, 0xffffffff, 0x00000100,
1379         0x3c224, 0xffffffff, 0x00000100,
1380         0x3c200, 0xffffffff, 0x00000100,
1381         0x3c230, 0xffffffff, 0x00000100,
1382         0x3c234, 0xffffffff, 0x00000100,
1383         0x3c250, 0xffffffff, 0x00000100,
1384         0x3c254, 0xffffffff, 0x00000100,
1385         0x3c258, 0xffffffff, 0x00000100,
1386         0x3c25c, 0xffffffff, 0x00000100,
1387         0x3c260, 0xffffffff, 0x00000100,
1388         0x3c27c, 0xffffffff, 0x00000100,
1389         0x3c278, 0xffffffff, 0x00000100,
1390         0x3c210, 0xffffffff, 0x06000100,
1391         0x3c290, 0xffffffff, 0x00000100,
1392         0x3c274, 0xffffffff, 0x00000100,
1393         0x3c2b4, 0xffffffff, 0x00000100,
1394         0x3c2b0, 0xffffffff, 0x00000100,
1395         0x3c270, 0xffffffff, 0x00000100,
1396         0x30800, 0xffffffff, 0xe0000000,
1397         0x3c020, 0xffffffff, 0x00010000,
1398         0x3c024, 0xffffffff, 0x00030002,
1399         0x3c028, 0xffffffff, 0x00040007,
1400         0x3c02c, 0xffffffff, 0x00060005,
1401         0x3c030, 0xffffffff, 0x00090008,
1402         0x3c034, 0xffffffff, 0x00010000,
1403         0x3c038, 0xffffffff, 0x00030002,
1404         0x3c03c, 0xffffffff, 0x00040007,
1405         0x3c040, 0xffffffff, 0x00060005,
1406         0x3c044, 0xffffffff, 0x00090008,
1407         0x3c048, 0xffffffff, 0x00010000,
1408         0x3c04c, 0xffffffff, 0x00030002,
1409         0x3c050, 0xffffffff, 0x00040007,
1410         0x3c054, 0xffffffff, 0x00060005,
1411         0x3c058, 0xffffffff, 0x00090008,
1412         0x3c05c, 0xffffffff, 0x00010000,
1413         0x3c060, 0xffffffff, 0x00030002,
1414         0x3c064, 0xffffffff, 0x00040007,
1415         0x3c068, 0xffffffff, 0x00060005,
1416         0x3c06c, 0xffffffff, 0x00090008,
1417         0x3c070, 0xffffffff, 0x00010000,
1418         0x3c074, 0xffffffff, 0x00030002,
1419         0x3c078, 0xffffffff, 0x00040007,
1420         0x3c07c, 0xffffffff, 0x00060005,
1421         0x3c080, 0xffffffff, 0x00090008,
1422         0x3c084, 0xffffffff, 0x00010000,
1423         0x3c088, 0xffffffff, 0x00030002,
1424         0x3c08c, 0xffffffff, 0x00040007,
1425         0x3c090, 0xffffffff, 0x00060005,
1426         0x3c094, 0xffffffff, 0x00090008,
1427         0x3c098, 0xffffffff, 0x00010000,
1428         0x3c09c, 0xffffffff, 0x00030002,
1429         0x3c0a0, 0xffffffff, 0x00040007,
1430         0x3c0a4, 0xffffffff, 0x00060005,
1431         0x3c0a8, 0xffffffff, 0x00090008,
1432         0x3c0ac, 0xffffffff, 0x00010000,
1433         0x3c0b0, 0xffffffff, 0x00030002,
1434         0x3c0b4, 0xffffffff, 0x00040007,
1435         0x3c0b8, 0xffffffff, 0x00060005,
1436         0x3c0bc, 0xffffffff, 0x00090008,
1437         0x3c0c0, 0xffffffff, 0x00010000,
1438         0x3c0c4, 0xffffffff, 0x00030002,
1439         0x3c0c8, 0xffffffff, 0x00040007,
1440         0x3c0cc, 0xffffffff, 0x00060005,
1441         0x3c0d0, 0xffffffff, 0x00090008,
1442         0x3c0d4, 0xffffffff, 0x00010000,
1443         0x3c0d8, 0xffffffff, 0x00030002,
1444         0x3c0dc, 0xffffffff, 0x00040007,
1445         0x3c0e0, 0xffffffff, 0x00060005,
1446         0x3c0e4, 0xffffffff, 0x00090008,
1447         0x3c0e8, 0xffffffff, 0x00010000,
1448         0x3c0ec, 0xffffffff, 0x00030002,
1449         0x3c0f0, 0xffffffff, 0x00040007,
1450         0x3c0f4, 0xffffffff, 0x00060005,
1451         0x3c0f8, 0xffffffff, 0x00090008,
1452         0xc318, 0xffffffff, 0x00020200,
1453         0x3350, 0xffffffff, 0x00000200,
1454         0x15c0, 0xffffffff, 0x00000400,
1455         0x55e8, 0xffffffff, 0x00000000,
1456         0x2f50, 0xffffffff, 0x00000902,
1457         0x3c000, 0xffffffff, 0x96940200,
1458         0x8708, 0xffffffff, 0x00900100,
1459         0xc424, 0xffffffff, 0x0020003f,
1460         0x38, 0xffffffff, 0x0140001c,
1461         0x3c, 0x000f0000, 0x000f0000,
1462         0x220, 0xffffffff, 0xc060000c,
1463         0x224, 0xc0000fff, 0x00000100,
1464         0xf90, 0xffffffff, 0x00000100,
1465         0xf98, 0x00000101, 0x00000000,
1466         0x20a8, 0xffffffff, 0x00000104,
1467         0x55e4, 0xff000fff, 0x00000100,
1468         0x30cc, 0xc0000fff, 0x00000104,
1469         0xc1e4, 0x00000001, 0x00000001,
1470         0xd00c, 0xff000ff0, 0x00000100,
1471         0xd80c, 0xff000ff0, 0x00000100
1472 };
1473
1474 static void cik_init_golden_registers(struct radeon_device *rdev)
1475 {
1476         switch (rdev->family) {
1477         case CHIP_BONAIRE:
1478                 radeon_program_register_sequence(rdev,
1479                                                  bonaire_mgcg_cgcg_init,
1480                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1481                 radeon_program_register_sequence(rdev,
1482                                                  bonaire_golden_registers,
1483                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1484                 radeon_program_register_sequence(rdev,
1485                                                  bonaire_golden_common_registers,
1486                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1487                 radeon_program_register_sequence(rdev,
1488                                                  bonaire_golden_spm_registers,
1489                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1490                 break;
1491         case CHIP_KABINI:
1492                 radeon_program_register_sequence(rdev,
1493                                                  kalindi_mgcg_cgcg_init,
1494                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1495                 radeon_program_register_sequence(rdev,
1496                                                  kalindi_golden_registers,
1497                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1498                 radeon_program_register_sequence(rdev,
1499                                                  kalindi_golden_common_registers,
1500                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1501                 radeon_program_register_sequence(rdev,
1502                                                  kalindi_golden_spm_registers,
1503                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1504                 break;
1505         case CHIP_KAVERI:
1506                 radeon_program_register_sequence(rdev,
1507                                                  spectre_mgcg_cgcg_init,
1508                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1509                 radeon_program_register_sequence(rdev,
1510                                                  spectre_golden_registers,
1511                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1512                 radeon_program_register_sequence(rdev,
1513                                                  spectre_golden_common_registers,
1514                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1515                 radeon_program_register_sequence(rdev,
1516                                                  spectre_golden_spm_registers,
1517                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1518                 break;
1519         case CHIP_HAWAII:
1520                 radeon_program_register_sequence(rdev,
1521                                                  hawaii_mgcg_cgcg_init,
1522                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1523                 radeon_program_register_sequence(rdev,
1524                                                  hawaii_golden_registers,
1525                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1526                 radeon_program_register_sequence(rdev,
1527                                                  hawaii_golden_common_registers,
1528                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1529                 radeon_program_register_sequence(rdev,
1530                                                  hawaii_golden_spm_registers,
1531                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1532                 break;
1533         default:
1534                 break;
1535         }
1536 }
1537
1538 /**
1539  * cik_get_xclk - get the xclk
1540  *
1541  * @rdev: radeon_device pointer
1542  *
1543  * Returns the reference clock used by the gfx engine
1544  * (CIK).
1545  */
1546 u32 cik_get_xclk(struct radeon_device *rdev)
1547 {
1548         u32 reference_clock = rdev->clock.spll.reference_freq;
1549
1550         if (rdev->flags & RADEON_IS_IGP) {
1551                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1552                         return reference_clock / 2;
1553         } else {
1554                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1555                         return reference_clock / 4;
1556         }
1557         return reference_clock;
1558 }
1559
1560 /**
1561  * cik_mm_rdoorbell - read a doorbell dword
1562  *
1563  * @rdev: radeon_device pointer
1564  * @index: doorbell index
1565  *
1566  * Returns the value in the doorbell aperture at the
1567  * requested doorbell index (CIK).
1568  */
1569 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1570 {
1571         if (index < rdev->doorbell.num_doorbells) {
1572                 return readl(rdev->doorbell.ptr + index);
1573         } else {
1574                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1575                 return 0;
1576         }
1577 }
1578
1579 /**
1580  * cik_mm_wdoorbell - write a doorbell dword
1581  *
1582  * @rdev: radeon_device pointer
1583  * @index: doorbell index
1584  * @v: value to write
1585  *
1586  * Writes @v to the doorbell aperture at the
1587  * requested doorbell index (CIK).
1588  */
1589 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1590 {
1591         if (index < rdev->doorbell.num_doorbells) {
1592                 writel(v, rdev->doorbell.ptr + index);
1593         } else {
1594                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1595         }
1596 }
1597
1598 #define BONAIRE_IO_MC_REGS_SIZE 36
1599
1600 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1601 {
1602         {0x00000070, 0x04400000},
1603         {0x00000071, 0x80c01803},
1604         {0x00000072, 0x00004004},
1605         {0x00000073, 0x00000100},
1606         {0x00000074, 0x00ff0000},
1607         {0x00000075, 0x34000000},
1608         {0x00000076, 0x08000014},
1609         {0x00000077, 0x00cc08ec},
1610         {0x00000078, 0x00000400},
1611         {0x00000079, 0x00000000},
1612         {0x0000007a, 0x04090000},
1613         {0x0000007c, 0x00000000},
1614         {0x0000007e, 0x4408a8e8},
1615         {0x0000007f, 0x00000304},
1616         {0x00000080, 0x00000000},
1617         {0x00000082, 0x00000001},
1618         {0x00000083, 0x00000002},
1619         {0x00000084, 0xf3e4f400},
1620         {0x00000085, 0x052024e3},
1621         {0x00000087, 0x00000000},
1622         {0x00000088, 0x01000000},
1623         {0x0000008a, 0x1c0a0000},
1624         {0x0000008b, 0xff010000},
1625         {0x0000008d, 0xffffefff},
1626         {0x0000008e, 0xfff3efff},
1627         {0x0000008f, 0xfff3efbf},
1628         {0x00000092, 0xf7ffffff},
1629         {0x00000093, 0xffffff7f},
1630         {0x00000095, 0x00101101},
1631         {0x00000096, 0x00000fff},
1632         {0x00000097, 0x00116fff},
1633         {0x00000098, 0x60010000},
1634         {0x00000099, 0x10010000},
1635         {0x0000009a, 0x00006000},
1636         {0x0000009b, 0x00001000},
1637         {0x0000009f, 0x00b48000}
1638 };
1639
1640 #define HAWAII_IO_MC_REGS_SIZE 22
1641
1642 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1643 {
1644         {0x0000007d, 0x40000000},
1645         {0x0000007e, 0x40180304},
1646         {0x0000007f, 0x0000ff00},
1647         {0x00000081, 0x00000000},
1648         {0x00000083, 0x00000800},
1649         {0x00000086, 0x00000000},
1650         {0x00000087, 0x00000100},
1651         {0x00000088, 0x00020100},
1652         {0x00000089, 0x00000000},
1653         {0x0000008b, 0x00040000},
1654         {0x0000008c, 0x00000100},
1655         {0x0000008e, 0xff010000},
1656         {0x00000090, 0xffffefff},
1657         {0x00000091, 0xfff3efff},
1658         {0x00000092, 0xfff3efbf},
1659         {0x00000093, 0xf7ffffff},
1660         {0x00000094, 0xffffff7f},
1661         {0x00000095, 0x00000fff},
1662         {0x00000096, 0x00116fff},
1663         {0x00000097, 0x60010000},
1664         {0x00000098, 0x10010000},
1665         {0x0000009f, 0x00c79000}
1666 };
1667
1668
1669 /**
1670  * cik_srbm_select - select specific register instances
1671  *
1672  * @rdev: radeon_device pointer
1673  * @me: selected ME (micro engine)
1674  * @pipe: pipe
1675  * @queue: queue
1676  * @vmid: VMID
1677  *
1678  * Switches the currently active registers instances.  Some
1679  * registers are instanced per VMID, others are instanced per
1680  * me/pipe/queue combination.
1681  */
1682 static void cik_srbm_select(struct radeon_device *rdev,
1683                             u32 me, u32 pipe, u32 queue, u32 vmid)
1684 {
1685         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1686                              MEID(me & 0x3) |
1687                              VMID(vmid & 0xf) |
1688                              QUEUEID(queue & 0x7));
1689         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1690 }
1691
1692 /* ucode loading */
1693 /**
1694  * ci_mc_load_microcode - load MC ucode into the hw
1695  *
1696  * @rdev: radeon_device pointer
1697  *
1698  * Load the GDDR MC ucode into the hw (CIK).
1699  * Returns 0 on success, error on failure.
1700  */
1701 int ci_mc_load_microcode(struct radeon_device *rdev)
1702 {
1703         const __be32 *fw_data;
1704         u32 running, blackout = 0;
1705         u32 *io_mc_regs;
1706         int i, ucode_size, regs_size;
1707
1708         if (!rdev->mc_fw)
1709                 return -EINVAL;
1710
1711         switch (rdev->family) {
1712         case CHIP_BONAIRE:
1713                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1714                 ucode_size = CIK_MC_UCODE_SIZE;
1715                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1716                 break;
1717         case CHIP_HAWAII:
1718                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1719                 ucode_size = HAWAII_MC_UCODE_SIZE;
1720                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1721                 break;
1722         default:
1723                 return -EINVAL;
1724         }
1725
1726         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1727
1728         if (running == 0) {
1729                 if (running) {
1730                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1731                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1732                 }
1733
1734                 /* reset the engine and set to writable */
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1736                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1737
1738                 /* load mc io regs */
1739                 for (i = 0; i < regs_size; i++) {
1740                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1741                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1742                 }
1743                 /* load the MC ucode */
1744                 fw_data = (const __be32 *)rdev->mc_fw->data;
1745                 for (i = 0; i < ucode_size; i++)
1746                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1747
1748                 /* put the engine back into the active state */
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1751                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1752
1753                 /* wait for training to complete */
1754                 for (i = 0; i < rdev->usec_timeout; i++) {
1755                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1756                                 break;
1757                         udelay(1);
1758                 }
1759                 for (i = 0; i < rdev->usec_timeout; i++) {
1760                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1761                                 break;
1762                         udelay(1);
1763                 }
1764
1765                 if (running)
1766                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1767         }
1768
1769         return 0;
1770 }
1771
1772 /**
1773  * cik_init_microcode - load ucode images from disk
1774  *
1775  * @rdev: radeon_device pointer
1776  *
1777  * Use the firmware interface to load the ucode images into
1778  * the driver (not loaded into hw).
1779  * Returns 0 on success, error on failure.
1780  */
1781 static int cik_init_microcode(struct radeon_device *rdev)
1782 {
1783         const char *chip_name;
1784         size_t pfp_req_size, me_req_size, ce_req_size,
1785                 mec_req_size, rlc_req_size, mc_req_size = 0,
1786                 sdma_req_size, smc_req_size = 0;
1787         char fw_name[30];
1788         int err;
1789
1790         DRM_DEBUG("\n");
1791
1792         switch (rdev->family) {
1793         case CHIP_BONAIRE:
1794                 chip_name = "BONAIRE";
1795                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1796                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1797                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1798                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1799                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1800                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1801                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1802                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1803                 break;
1804         case CHIP_HAWAII:
1805                 chip_name = "HAWAII";
1806                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1807                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1808                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1809                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1810                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1811                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1812                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1813                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1814                 break;
1815         case CHIP_KAVERI:
1816                 chip_name = "KAVERI";
1817                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1818                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1819                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1820                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1821                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1822                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1823                 break;
1824         case CHIP_KABINI:
1825                 chip_name = "KABINI";
1826                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1827                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1828                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1829                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1830                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1831                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1832                 break;
1833         default: BUG();
1834         }
1835
1836         DRM_INFO("Loading %s Microcode\n", chip_name);
1837
1838         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1839         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1840         if (err)
1841                 goto out;
1842         if (rdev->pfp_fw->size != pfp_req_size) {
1843                 printk(KERN_ERR
1844                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1845                        rdev->pfp_fw->size, fw_name);
1846                 err = -EINVAL;
1847                 goto out;
1848         }
1849
1850         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1851         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1852         if (err)
1853                 goto out;
1854         if (rdev->me_fw->size != me_req_size) {
1855                 printk(KERN_ERR
1856                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1857                        rdev->me_fw->size, fw_name);
1858                 err = -EINVAL;
1859         }
1860
1861         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1862         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1863         if (err)
1864                 goto out;
1865         if (rdev->ce_fw->size != ce_req_size) {
1866                 printk(KERN_ERR
1867                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1868                        rdev->ce_fw->size, fw_name);
1869                 err = -EINVAL;
1870         }
1871
1872         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1873         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1874         if (err)
1875                 goto out;
1876         if (rdev->mec_fw->size != mec_req_size) {
1877                 printk(KERN_ERR
1878                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1879                        rdev->mec_fw->size, fw_name);
1880                 err = -EINVAL;
1881         }
1882
1883         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1884         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1885         if (err)
1886                 goto out;
1887         if (rdev->rlc_fw->size != rlc_req_size) {
1888                 printk(KERN_ERR
1889                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1890                        rdev->rlc_fw->size, fw_name);
1891                 err = -EINVAL;
1892         }
1893
1894         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1895         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1896         if (err)
1897                 goto out;
1898         if (rdev->sdma_fw->size != sdma_req_size) {
1899                 printk(KERN_ERR
1900                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1901                        rdev->sdma_fw->size, fw_name);
1902                 err = -EINVAL;
1903         }
1904
1905         /* No SMC, MC ucode on APUs */
1906         if (!(rdev->flags & RADEON_IS_IGP)) {
1907                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1908                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1909                 if (err)
1910                         goto out;
1911                 if (rdev->mc_fw->size != mc_req_size) {
1912                         printk(KERN_ERR
1913                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1914                                rdev->mc_fw->size, fw_name);
1915                         err = -EINVAL;
1916                 }
1917
1918                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1919                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1920                 if (err) {
1921                         printk(KERN_ERR
1922                                "smc: error loading firmware \"%s\"\n",
1923                                fw_name);
1924                         release_firmware(rdev->smc_fw);
1925                         rdev->smc_fw = NULL;
1926                         err = 0;
1927                 } else if (rdev->smc_fw->size != smc_req_size) {
1928                         printk(KERN_ERR
1929                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1930                                rdev->smc_fw->size, fw_name);
1931                         err = -EINVAL;
1932                 }
1933         }
1934
1935 out:
1936         if (err) {
1937                 if (err != -EINVAL)
1938                         printk(KERN_ERR
1939                                "cik_cp: Failed to load firmware \"%s\"\n",
1940                                fw_name);
1941                 release_firmware(rdev->pfp_fw);
1942                 rdev->pfp_fw = NULL;
1943                 release_firmware(rdev->me_fw);
1944                 rdev->me_fw = NULL;
1945                 release_firmware(rdev->ce_fw);
1946                 rdev->ce_fw = NULL;
1947                 release_firmware(rdev->rlc_fw);
1948                 rdev->rlc_fw = NULL;
1949                 release_firmware(rdev->mc_fw);
1950                 rdev->mc_fw = NULL;
1951                 release_firmware(rdev->smc_fw);
1952                 rdev->smc_fw = NULL;
1953         }
1954         return err;
1955 }
1956
1957 /*
1958  * Core functions
1959  */
1960 /**
1961  * cik_tiling_mode_table_init - init the hw tiling table
1962  *
1963  * @rdev: radeon_device pointer
1964  *
1965  * Starting with SI, the tiling setup is done globally in a
1966  * set of 32 tiling modes.  Rather than selecting each set of
1967  * parameters per surface as on older asics, we just select
1968  * which index in the tiling table we want to use, and the
1969  * surface uses those parameters (CIK).
1970  */
1971 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1972 {
1973         const u32 num_tile_mode_states = 32;
1974         const u32 num_secondary_tile_mode_states = 16;
1975         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1976         u32 num_pipe_configs;
1977         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1978                 rdev->config.cik.max_shader_engines;
1979
1980         switch (rdev->config.cik.mem_row_size_in_kb) {
1981         case 1:
1982                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1983                 break;
1984         case 2:
1985         default:
1986                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1987                 break;
1988         case 4:
1989                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1990                 break;
1991         }
1992
1993         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1994         if (num_pipe_configs > 8)
1995                 num_pipe_configs = 16;
1996
1997         if (num_pipe_configs == 16) {
1998                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1999                         switch (reg_offset) {
2000                         case 0:
2001                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2002                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2003                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2004                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2005                                 break;
2006                         case 1:
2007                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2008                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2009                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2010                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2011                                 break;
2012                         case 2:
2013                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2016                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017                                 break;
2018                         case 3:
2019                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2020                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2022                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2023                                 break;
2024                         case 4:
2025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2028                                                  TILE_SPLIT(split_equal_to_row_size));
2029                                 break;
2030                         case 5:
2031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2032                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2033                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2034                                 break;
2035                         case 6:
2036                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2037                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2038                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2039                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2040                                 break;
2041                         case 7:
2042                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2043                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2044                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2045                                                  TILE_SPLIT(split_equal_to_row_size));
2046                                 break;
2047                         case 8:
2048                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2049                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2050                                 break;
2051                         case 9:
2052                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2053                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2054                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2055                                 break;
2056                         case 10:
2057                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2058                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2060                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061                                 break;
2062                         case 11:
2063                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2065                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2066                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067                                 break;
2068                         case 12:
2069                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2070                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2072                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073                                 break;
2074                         case 13:
2075                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2076                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2078                                 break;
2079                         case 14:
2080                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2081                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2083                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2084                                 break;
2085                         case 16:
2086                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2087                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2088                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2089                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                                 break;
2091                         case 17:
2092                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2093                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2095                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096                                 break;
2097                         case 27:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2099                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2100                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2101                                 break;
2102                         case 28:
2103                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2105                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2106                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2107                                 break;
2108                         case 29:
2109                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2110                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2111                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2112                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2113                                 break;
2114                         case 30:
2115                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2116                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2117                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2118                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2119                                 break;
2120                         default:
2121                                 gb_tile_moden = 0;
2122                                 break;
2123                         }
2124                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2125                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2126                 }
2127                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2128                         switch (reg_offset) {
2129                         case 0:
2130                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2131                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2132                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2133                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2134                                 break;
2135                         case 1:
2136                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2137                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2138                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2140                                 break;
2141                         case 2:
2142                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2143                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2144                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2145                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2146                                 break;
2147                         case 3:
2148                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2150                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2151                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2152                                 break;
2153                         case 4:
2154                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2155                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2156                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2157                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2158                                 break;
2159                         case 5:
2160                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2163                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2164                                 break;
2165                         case 6:
2166                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2168                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2169                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2170                                 break;
2171                         case 8:
2172                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2173                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2175                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2176                                 break;
2177                         case 9:
2178                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2182                                 break;
2183                         case 10:
2184                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2187                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2188                                 break;
2189                         case 11:
2190                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2192                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2193                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2194                                 break;
2195                         case 12:
2196                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2197                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2198                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2199                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2200                                 break;
2201                         case 13:
2202                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2205                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2206                                 break;
2207                         case 14:
2208                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2209                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2210                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2211                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2212                                 break;
2213                         default:
2214                                 gb_tile_moden = 0;
2215                                 break;
2216                         }
2217                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2218                 }
2219         } else if (num_pipe_configs == 8) {
2220                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2221                         switch (reg_offset) {
2222                         case 0:
2223                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2225                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2226                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2227                                 break;
2228                         case 1:
2229                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2231                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2232                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2233                                 break;
2234                         case 2:
2235                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2237                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2238                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2239                                 break;
2240                         case 3:
2241                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2243                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2244                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2245                                 break;
2246                         case 4:
2247                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2248                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2249                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2250                                                  TILE_SPLIT(split_equal_to_row_size));
2251                                 break;
2252                         case 5:
2253                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256                                 break;
2257                         case 6:
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2262                                 break;
2263                         case 7:
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2265                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2266                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2267                                                  TILE_SPLIT(split_equal_to_row_size));
2268                                 break;
2269                         case 8:
2270                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2271                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2272                                 break;
2273                         case 9:
2274                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2276                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2277                                 break;
2278                         case 10:
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2282                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283                                 break;
2284                         case 11:
2285                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2286                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2287                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2288                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2289                                 break;
2290                         case 12:
2291                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2292                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2294                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295                                 break;
2296                         case 13:
2297                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2299                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2300                                 break;
2301                         case 14:
2302                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2305                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306                                 break;
2307                         case 16:
2308                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2309                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2310                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2311                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312                                 break;
2313                         case 17:
2314                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2315                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2317                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318                                 break;
2319                         case 27:
2320                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2321                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2322                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2323                                 break;
2324                         case 28:
2325                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2327                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2328                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329                                 break;
2330                         case 29:
2331                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2333                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2334                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2335                                 break;
2336                         case 30:
2337                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2338                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2339                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2340                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2341                                 break;
2342                         default:
2343                                 gb_tile_moden = 0;
2344                                 break;
2345                         }
2346                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2347                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2348                 }
2349                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2350                         switch (reg_offset) {
2351                         case 0:
2352                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2353                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2354                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2355                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2356                                 break;
2357                         case 1:
2358                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2361                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2362                                 break;
2363                         case 2:
2364                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2368                                 break;
2369                         case 3:
2370                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2372                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2373                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2374                                 break;
2375                         case 4:
2376                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2379                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2380                                 break;
2381                         case 5:
2382                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2385                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2386                                 break;
2387                         case 6:
2388                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2392                                 break;
2393                         case 8:
2394                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2396                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2397                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2398                                 break;
2399                         case 9:
2400                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2402                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2403                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2404                                 break;
2405                         case 10:
2406                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2408                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2409                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2410                                 break;
2411                         case 11:
2412                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2415                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2416                                 break;
2417                         case 12:
2418                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2420                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2421                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2422                                 break;
2423                         case 13:
2424                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2427                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2428                                 break;
2429                         case 14:
2430                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2432                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2433                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2434                                 break;
2435                         default:
2436                                 gb_tile_moden = 0;
2437                                 break;
2438                         }
2439                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2440                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441                 }
2442         } else if (num_pipe_configs == 4) {
2443                 if (num_rbs == 4) {
2444                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2445                                 switch (reg_offset) {
2446                                 case 0:
2447                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2449                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2450                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2451                                         break;
2452                                 case 1:
2453                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2455                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2456                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2457                                         break;
2458                                 case 2:
2459                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2461                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2463                                         break;
2464                                 case 3:
2465                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2467                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2469                                         break;
2470                                 case 4:
2471                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2473                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2474                                                          TILE_SPLIT(split_equal_to_row_size));
2475                                         break;
2476                                 case 5:
2477                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2478                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2479                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2480                                         break;
2481                                 case 6:
2482                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2483                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2484                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2485                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2486                                         break;
2487                                 case 7:
2488                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2490                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491                                                          TILE_SPLIT(split_equal_to_row_size));
2492                                         break;
2493                                 case 8:
2494                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2495                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2496                                         break;
2497                                 case 9:
2498                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2499                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2500                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2501                                         break;
2502                                 case 10:
2503                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2505                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507                                         break;
2508                                 case 11:
2509                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2511                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2512                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513                                         break;
2514                                 case 12:
2515                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2516                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2517                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2518                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519                                         break;
2520                                 case 13:
2521                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2523                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2524                                         break;
2525                                 case 14:
2526                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                                         break;
2531                                 case 16:
2532                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2534                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2535                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2536                                         break;
2537                                 case 17:
2538                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2539                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2541                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542                                         break;
2543                                 case 27:
2544                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2546                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2547                                         break;
2548                                 case 28:
2549                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2550                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                                         break;
2554                                 case 29:
2555                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2557                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2558                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559                                         break;
2560                                 case 30:
2561                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2562                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2563                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565                                         break;
2566                                 default:
2567                                         gb_tile_moden = 0;
2568                                         break;
2569                                 }
2570                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2571                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2572                         }
2573                 } else if (num_rbs < 4) {
2574                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2575                                 switch (reg_offset) {
2576                                 case 0:
2577                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2579                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2580                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2581                                         break;
2582                                 case 1:
2583                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2585                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2586                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2587                                         break;
2588                                 case 2:
2589                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2591                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2592                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2593                                         break;
2594                                 case 3:
2595                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2597                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2598                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2599                                         break;
2600                                 case 4:
2601                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2603                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604                                                          TILE_SPLIT(split_equal_to_row_size));
2605                                         break;
2606                                 case 5:
2607                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2608                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2609                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610                                         break;
2611                                 case 6:
2612                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2613                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2614                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2615                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2616                                         break;
2617                                 case 7:
2618                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2619                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2620                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                                                          TILE_SPLIT(split_equal_to_row_size));
2622                                         break;
2623                                 case 8:
2624                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2626                                         break;
2627                                 case 9:
2628                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2629                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2630                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2631                                         break;
2632                                 case 10:
2633                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2635                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2636                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637                                         break;
2638                                 case 11:
2639                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2640                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2641                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2642                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                                         break;
2644                                 case 12:
2645                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2646                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2647                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2648                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649                                         break;
2650                                 case 13:
2651                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2652                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2653                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2654                                         break;
2655                                 case 14:
2656                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2659                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2660                                         break;
2661                                 case 16:
2662                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2663                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666                                         break;
2667                                 case 17:
2668                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2669                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672                                         break;
2673                                 case 27:
2674                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2675                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2677                                         break;
2678                                 case 28:
2679                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2681                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2683                                         break;
2684                                 case 29:
2685                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2687                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2688                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689                                         break;
2690                                 case 30:
2691                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2692                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2693                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2695                                         break;
2696                                 default:
2697                                         gb_tile_moden = 0;
2698                                         break;
2699                                 }
2700                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2701                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2702                         }
2703                 }
2704                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2705                         switch (reg_offset) {
2706                         case 0:
2707                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2709                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2710                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2711                                 break;
2712                         case 1:
2713                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2715                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2717                                 break;
2718                         case 2:
2719                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2723                                 break;
2724                         case 3:
2725                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2728                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2729                                 break;
2730                         case 4:
2731                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2735                                 break;
2736                         case 5:
2737                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2741                                 break;
2742                         case 6:
2743                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2745                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2746                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2747                                 break;
2748                         case 8:
2749                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2750                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2751                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2753                                 break;
2754                         case 9:
2755                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2756                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2759                                 break;
2760                         case 10:
2761                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2763                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2764                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2765                                 break;
2766                         case 11:
2767                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2769                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2771                                 break;
2772                         case 12:
2773                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2776                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2777                                 break;
2778                         case 13:
2779                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2782                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2783                                 break;
2784                         case 14:
2785                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2788                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2789                                 break;
2790                         default:
2791                                 gb_tile_moden = 0;
2792                                 break;
2793                         }
2794                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2795                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2796                 }
2797         } else if (num_pipe_configs == 2) {
2798                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2799                         switch (reg_offset) {
2800                         case 0:
2801                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2803                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2804                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2805                                 break;
2806                         case 1:
2807                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2809                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2810                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2811                                 break;
2812                         case 2:
2813                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2815                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2816                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2817                                 break;
2818                         case 3:
2819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2822                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2823                                 break;
2824                         case 4:
2825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2827                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2828                                                  TILE_SPLIT(split_equal_to_row_size));
2829                                 break;
2830                         case 5:
2831                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2832                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2833                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834                                 break;
2835                         case 6:
2836                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2837                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2838                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2839                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2840                                 break;
2841                         case 7:
2842                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2843                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2844                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2845                                                  TILE_SPLIT(split_equal_to_row_size));
2846                                 break;
2847                         case 8:
2848                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2849                                                 PIPE_CONFIG(ADDR_SURF_P2);
2850                                 break;
2851                         case 9:
2852                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854                                                  PIPE_CONFIG(ADDR_SURF_P2));
2855                                 break;
2856                         case 10:
2857                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2859                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2860                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861                                 break;
2862                         case 11:
2863                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2865                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2866                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                                 break;
2868                         case 12:
2869                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2870                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2871                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2872                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873                                 break;
2874                         case 13:
2875                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2877                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2878                                 break;
2879                         case 14:
2880                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2883                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2884                                 break;
2885                         case 16:
2886                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2889                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2890                                 break;
2891                         case 17:
2892                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2893                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2895                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896                                 break;
2897                         case 27:
2898                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2899                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900                                                  PIPE_CONFIG(ADDR_SURF_P2));
2901                                 break;
2902                         case 28:
2903                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2904                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2905                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2906                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907                                 break;
2908                         case 29:
2909                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2911                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2912                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913                                 break;
2914                         case 30:
2915                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2916                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2917                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2918                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919                                 break;
2920                         default:
2921                                 gb_tile_moden = 0;
2922                                 break;
2923                         }
2924                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2925                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2926                 }
2927                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2928                         switch (reg_offset) {
2929                         case 0:
2930                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2931                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2934                                 break;
2935                         case 1:
2936                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2937                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2938                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2939                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2940                                 break;
2941                         case 2:
2942                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2944                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2946                                 break;
2947                         case 3:
2948                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2951                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2952                                 break;
2953                         case 4:
2954                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2956                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2958                                 break;
2959                         case 5:
2960                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2962                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2964                                 break;
2965                         case 6:
2966                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2968                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2969                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2970                                 break;
2971                         case 8:
2972                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2973                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2974                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2976                                 break;
2977                         case 9:
2978                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2979                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2980                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2981                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2982                                 break;
2983                         case 10:
2984                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2985                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2986                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2988                                 break;
2989                         case 11:
2990                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2991                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2992                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2993                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2994                                 break;
2995                         case 12:
2996                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3000                                 break;
3001                         case 13:
3002                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3004                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3005                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3006                                 break;
3007                         case 14:
3008                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3012                                 break;
3013                         default:
3014                                 gb_tile_moden = 0;
3015                                 break;
3016                         }
3017                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3018                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3019                 }
3020         } else
3021                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3022 }
3023
3024 /**
3025  * cik_select_se_sh - select which SE, SH to address
3026  *
3027  * @rdev: radeon_device pointer
3028  * @se_num: shader engine to address
3029  * @sh_num: sh block to address
3030  *
3031  * Select which SE, SH combinations to address. Certain
3032  * registers are instanced per SE or SH.  0xffffffff means
3033  * broadcast to all SEs or SHs (CIK).
3034  */
3035 static void cik_select_se_sh(struct radeon_device *rdev,
3036                              u32 se_num, u32 sh_num)
3037 {
3038         u32 data = INSTANCE_BROADCAST_WRITES;
3039
3040         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042         else if (se_num == 0xffffffff)
3043                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044         else if (sh_num == 0xffffffff)
3045                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046         else
3047                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048         WREG32(GRBM_GFX_INDEX, data);
3049 }
3050
3051 /**
3052  * cik_create_bitmask - create a bitmask
3053  *
3054  * @bit_width: length of the mask
3055  *
3056  * create a variable length bit mask (CIK).
3057  * Returns the bitmask.
3058  */
3059 static u32 cik_create_bitmask(u32 bit_width)
3060 {
3061         u32 i, mask = 0;
3062
3063         for (i = 0; i < bit_width; i++) {
3064                 mask <<= 1;
3065                 mask |= 1;
3066         }
3067         return mask;
3068 }
3069
3070 /**
3071  * cik_get_rb_disabled - computes the mask of disabled RBs
3072  *
3073  * @rdev: radeon_device pointer
3074  * @max_rb_num: max RBs (render backends) for the asic
3075  * @se_num: number of SEs (shader engines) for the asic
3076  * @sh_per_se: number of SH blocks per SE for the asic
3077  *
3078  * Calculates the bitmask of disabled RBs (CIK).
3079  * Returns the disabled RB bitmask.
3080  */
3081 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082                               u32 max_rb_num_per_se,
3083                               u32 sh_per_se)
3084 {
3085         u32 data, mask;
3086
3087         data = RREG32(CC_RB_BACKEND_DISABLE);
3088         if (data & 1)
3089                 data &= BACKEND_DISABLE_MASK;
3090         else
3091                 data = 0;
3092         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093
3094         data >>= BACKEND_DISABLE_SHIFT;
3095
3096         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097
3098         return data & mask;
3099 }
3100
3101 /**
3102  * cik_setup_rb - setup the RBs on the asic
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: number of SEs (shader engines) for the asic
3106  * @sh_per_se: number of SH blocks per SE for the asic
3107  * @max_rb_num: max RBs (render backends) for the asic
3108  *
3109  * Configures per-SE/SH RB registers (CIK).
3110  */
3111 static void cik_setup_rb(struct radeon_device *rdev,
3112                          u32 se_num, u32 sh_per_se,
3113                          u32 max_rb_num_per_se)
3114 {
3115         int i, j;
3116         u32 data, mask;
3117         u32 disabled_rbs = 0;
3118         u32 enabled_rbs = 0;
3119
3120         for (i = 0; i < se_num; i++) {
3121                 for (j = 0; j < sh_per_se; j++) {
3122                         cik_select_se_sh(rdev, i, j);
3123                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124                         if (rdev->family == CHIP_HAWAII)
3125                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126                         else
3127                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128                 }
3129         }
3130         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131
3132         mask = 1;
3133         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134                 if (!(disabled_rbs & mask))
3135                         enabled_rbs |= mask;
3136                 mask <<= 1;
3137         }
3138
3139         rdev->config.cik.backend_enable_mask = enabled_rbs;
3140
3141         for (i = 0; i < se_num; i++) {
3142                 cik_select_se_sh(rdev, i, 0xffffffff);
3143                 data = 0;
3144                 for (j = 0; j < sh_per_se; j++) {
3145                         switch (enabled_rbs & 3) {
3146                         case 0:
3147                                 if (j == 0)
3148                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149                                 else
3150                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151                                 break;
3152                         case 1:
3153                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154                                 break;
3155                         case 2:
3156                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157                                 break;
3158                         case 3:
3159                         default:
3160                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161                                 break;
3162                         }
3163                         enabled_rbs >>= 2;
3164                 }
3165                 WREG32(PA_SC_RASTER_CONFIG, data);
3166         }
3167         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168 }
3169
3170 /**
3171  * cik_gpu_init - setup the 3D engine
3172  *
3173  * @rdev: radeon_device pointer
3174  *
3175  * Configures the 3D engine and tiling configuration
3176  * registers so that the 3D engine is usable.
3177  */
3178 static void cik_gpu_init(struct radeon_device *rdev)
3179 {
3180         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181         u32 mc_shared_chmap, mc_arb_ramcfg;
3182         u32 hdp_host_path_cntl;
3183         u32 tmp;
3184         int i, j;
3185
3186         switch (rdev->family) {
3187         case CHIP_BONAIRE:
3188                 rdev->config.cik.max_shader_engines = 2;
3189                 rdev->config.cik.max_tile_pipes = 4;
3190                 rdev->config.cik.max_cu_per_sh = 7;
3191                 rdev->config.cik.max_sh_per_se = 1;
3192                 rdev->config.cik.max_backends_per_se = 2;
3193                 rdev->config.cik.max_texture_channel_caches = 4;
3194                 rdev->config.cik.max_gprs = 256;
3195                 rdev->config.cik.max_gs_threads = 32;
3196                 rdev->config.cik.max_hw_contexts = 8;
3197
3198                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203                 break;
3204         case CHIP_HAWAII:
3205                 rdev->config.cik.max_shader_engines = 4;
3206                 rdev->config.cik.max_tile_pipes = 16;
3207                 rdev->config.cik.max_cu_per_sh = 11;
3208                 rdev->config.cik.max_sh_per_se = 1;
3209                 rdev->config.cik.max_backends_per_se = 4;
3210                 rdev->config.cik.max_texture_channel_caches = 16;
3211                 rdev->config.cik.max_gprs = 256;
3212                 rdev->config.cik.max_gs_threads = 32;
3213                 rdev->config.cik.max_hw_contexts = 8;
3214
3215                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220                 break;
3221         case CHIP_KAVERI:
3222                 rdev->config.cik.max_shader_engines = 1;
3223                 rdev->config.cik.max_tile_pipes = 4;
3224                 if ((rdev->pdev->device == 0x1304) ||
3225                     (rdev->pdev->device == 0x1305) ||
3226                     (rdev->pdev->device == 0x130C) ||
3227                     (rdev->pdev->device == 0x130F) ||
3228                     (rdev->pdev->device == 0x1310) ||
3229                     (rdev->pdev->device == 0x1311) ||
3230                     (rdev->pdev->device == 0x131C)) {
3231                         rdev->config.cik.max_cu_per_sh = 8;
3232                         rdev->config.cik.max_backends_per_se = 2;
3233                 } else if ((rdev->pdev->device == 0x1309) ||
3234                            (rdev->pdev->device == 0x130A) ||
3235                            (rdev->pdev->device == 0x130D) ||
3236                            (rdev->pdev->device == 0x1313) ||
3237                            (rdev->pdev->device == 0x131D)) {
3238                         rdev->config.cik.max_cu_per_sh = 6;
3239                         rdev->config.cik.max_backends_per_se = 2;
3240                 } else if ((rdev->pdev->device == 0x1306) ||
3241                            (rdev->pdev->device == 0x1307) ||
3242                            (rdev->pdev->device == 0x130B) ||
3243                            (rdev->pdev->device == 0x130E) ||
3244                            (rdev->pdev->device == 0x1315) ||
3245                            (rdev->pdev->device == 0x131B)) {
3246                         rdev->config.cik.max_cu_per_sh = 4;
3247                         rdev->config.cik.max_backends_per_se = 1;
3248                 } else {
3249                         rdev->config.cik.max_cu_per_sh = 3;
3250                         rdev->config.cik.max_backends_per_se = 1;
3251                 }
3252                 rdev->config.cik.max_sh_per_se = 1;
3253                 rdev->config.cik.max_texture_channel_caches = 4;
3254                 rdev->config.cik.max_gprs = 256;
3255                 rdev->config.cik.max_gs_threads = 16;
3256                 rdev->config.cik.max_hw_contexts = 8;
3257
3258                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3259                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3260                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3261                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3262                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3263                 break;
3264         case CHIP_KABINI:
3265         default:
3266                 rdev->config.cik.max_shader_engines = 1;
3267                 rdev->config.cik.max_tile_pipes = 2;
3268                 rdev->config.cik.max_cu_per_sh = 2;
3269                 rdev->config.cik.max_sh_per_se = 1;
3270                 rdev->config.cik.max_backends_per_se = 1;
3271                 rdev->config.cik.max_texture_channel_caches = 2;
3272                 rdev->config.cik.max_gprs = 256;
3273                 rdev->config.cik.max_gs_threads = 16;
3274                 rdev->config.cik.max_hw_contexts = 8;
3275
3276                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3277                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3278                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3279                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3280                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3281                 break;
3282         }
3283
3284         /* Initialize HDP */
3285         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3286                 WREG32((0x2c14 + j), 0x00000000);
3287                 WREG32((0x2c18 + j), 0x00000000);
3288                 WREG32((0x2c1c + j), 0x00000000);
3289                 WREG32((0x2c20 + j), 0x00000000);
3290                 WREG32((0x2c24 + j), 0x00000000);
3291         }
3292
3293         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3294
3295         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3296
3297         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3298         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3299
3300         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3301         rdev->config.cik.mem_max_burst_length_bytes = 256;
3302         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3303         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3304         if (rdev->config.cik.mem_row_size_in_kb > 4)
3305                 rdev->config.cik.mem_row_size_in_kb = 4;
3306         /* XXX use MC settings? */
3307         rdev->config.cik.shader_engine_tile_size = 32;
3308         rdev->config.cik.num_gpus = 1;
3309         rdev->config.cik.multi_gpu_tile_size = 64;
3310
3311         /* fix up row size */
3312         gb_addr_config &= ~ROW_SIZE_MASK;
3313         switch (rdev->config.cik.mem_row_size_in_kb) {
3314         case 1:
3315         default:
3316                 gb_addr_config |= ROW_SIZE(0);
3317                 break;
3318         case 2:
3319                 gb_addr_config |= ROW_SIZE(1);
3320                 break;
3321         case 4:
3322                 gb_addr_config |= ROW_SIZE(2);
3323                 break;
3324         }
3325
3326         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3327          * not have bank info, so create a custom tiling dword.
3328          * bits 3:0   num_pipes
3329          * bits 7:4   num_banks
3330          * bits 11:8  group_size
3331          * bits 15:12 row_size
3332          */
3333         rdev->config.cik.tile_config = 0;
3334         switch (rdev->config.cik.num_tile_pipes) {
3335         case 1:
3336                 rdev->config.cik.tile_config |= (0 << 0);
3337                 break;
3338         case 2:
3339                 rdev->config.cik.tile_config |= (1 << 0);
3340                 break;
3341         case 4:
3342                 rdev->config.cik.tile_config |= (2 << 0);
3343                 break;
3344         case 8:
3345         default:
3346                 /* XXX what about 12? */
3347                 rdev->config.cik.tile_config |= (3 << 0);
3348                 break;
3349         }
3350         rdev->config.cik.tile_config |=
3351                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3352         rdev->config.cik.tile_config |=
3353                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3354         rdev->config.cik.tile_config |=
3355                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3356
3357         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3358         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3359         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3360         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3361         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3362         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3363         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3364         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3365
3366         cik_tiling_mode_table_init(rdev);
3367
3368         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3369                      rdev->config.cik.max_sh_per_se,
3370                      rdev->config.cik.max_backends_per_se);
3371
3372         /* set HW defaults for 3D engine */
3373         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3374
3375         WREG32(SX_DEBUG_1, 0x20);
3376
3377         WREG32(TA_CNTL_AUX, 0x00010000);
3378
3379         tmp = RREG32(SPI_CONFIG_CNTL);
3380         tmp |= 0x03000000;
3381         WREG32(SPI_CONFIG_CNTL, tmp);
3382
3383         WREG32(SQ_CONFIG, 1);
3384
3385         WREG32(DB_DEBUG, 0);
3386
3387         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3388         tmp |= 0x00000400;
3389         WREG32(DB_DEBUG2, tmp);
3390
3391         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3392         tmp |= 0x00020200;
3393         WREG32(DB_DEBUG3, tmp);
3394
3395         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3396         tmp |= 0x00018208;
3397         WREG32(CB_HW_CONTROL, tmp);
3398
3399         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3400
3401         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3402                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3403                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3404                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3405
3406         WREG32(VGT_NUM_INSTANCES, 1);
3407
3408         WREG32(CP_PERFMON_CNTL, 0);
3409
3410         WREG32(SQ_CONFIG, 0);
3411
3412         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3413                                           FORCE_EOV_MAX_REZ_CNT(255)));
3414
3415         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3416                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3417
3418         WREG32(VGT_GS_VERTEX_REUSE, 16);
3419         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3420
3421         tmp = RREG32(HDP_MISC_CNTL);
3422         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3423         WREG32(HDP_MISC_CNTL, tmp);
3424
3425         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3426         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3427
3428         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3429         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3430
3431         udelay(50);
3432 }
3433
3434 /*
3435  * GPU scratch registers helpers function.
3436  */
3437 /**
3438  * cik_scratch_init - setup driver info for CP scratch regs
3439  *
3440  * @rdev: radeon_device pointer
3441  *
3442  * Set up the number and offset of the CP scratch registers.
3443  * NOTE: use of CP scratch registers is a legacy inferface and
3444  * is not used by default on newer asics (r6xx+).  On newer asics,
3445  * memory buffers are used for fences rather than scratch regs.
3446  */
3447 static void cik_scratch_init(struct radeon_device *rdev)
3448 {
3449         int i;
3450
3451         rdev->scratch.num_reg = 7;
3452         rdev->scratch.reg_base = SCRATCH_REG0;
3453         for (i = 0; i < rdev->scratch.num_reg; i++) {
3454                 rdev->scratch.free[i] = true;
3455                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3456         }
3457 }
3458
3459 /**
3460  * cik_ring_test - basic gfx ring test
3461  *
3462  * @rdev: radeon_device pointer
3463  * @ring: radeon_ring structure holding ring information
3464  *
3465  * Allocate a scratch register and write to it using the gfx ring (CIK).
3466  * Provides a basic gfx ring test to verify that the ring is working.
3467  * Used by cik_cp_gfx_resume();
3468  * Returns 0 on success, error on failure.
3469  */
3470 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3471 {
3472         uint32_t scratch;
3473         uint32_t tmp = 0;
3474         unsigned i;
3475         int r;
3476
3477         r = radeon_scratch_get(rdev, &scratch);
3478         if (r) {
3479                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3480                 return r;
3481         }
3482         WREG32(scratch, 0xCAFEDEAD);
3483         r = radeon_ring_lock(rdev, ring, 3);
3484         if (r) {
3485                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3486                 radeon_scratch_free(rdev, scratch);
3487                 return r;
3488         }
3489         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3490         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3491         radeon_ring_write(ring, 0xDEADBEEF);
3492         radeon_ring_unlock_commit(rdev, ring);
3493
3494         for (i = 0; i < rdev->usec_timeout; i++) {
3495                 tmp = RREG32(scratch);
3496                 if (tmp == 0xDEADBEEF)
3497                         break;
3498                 DRM_UDELAY(1);
3499         }
3500         if (i < rdev->usec_timeout) {
3501                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3502         } else {
3503                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3504                           ring->idx, scratch, tmp);
3505                 r = -EINVAL;
3506         }
3507         radeon_scratch_free(rdev, scratch);
3508         return r;
3509 }
3510
3511 /**
3512  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3513  *
3514  * @rdev: radeon_device pointer
3515  * @ridx: radeon ring index
3516  *
3517  * Emits an hdp flush on the cp.
3518  */
3519 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3520                                        int ridx)
3521 {
3522         struct radeon_ring *ring = &rdev->ring[ridx];
3523         u32 ref_and_mask;
3524
3525         switch (ring->idx) {
3526         case CAYMAN_RING_TYPE_CP1_INDEX:
3527         case CAYMAN_RING_TYPE_CP2_INDEX:
3528         default:
3529                 switch (ring->me) {
3530                 case 0:
3531                         ref_and_mask = CP2 << ring->pipe;
3532                         break;
3533                 case 1:
3534                         ref_and_mask = CP6 << ring->pipe;
3535                         break;
3536                 default:
3537                         return;
3538                 }
3539                 break;
3540         case RADEON_RING_TYPE_GFX_INDEX:
3541                 ref_and_mask = CP0;
3542                 break;
3543         }
3544
3545         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3546         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3547                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3548                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3549         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3550         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3551         radeon_ring_write(ring, ref_and_mask);
3552         radeon_ring_write(ring, ref_and_mask);
3553         radeon_ring_write(ring, 0x20); /* poll interval */
3554 }
3555
3556 /**
3557  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3558  *
3559  * @rdev: radeon_device pointer
3560  * @fence: radeon fence object
3561  *
3562  * Emits a fence sequnce number on the gfx ring and flushes
3563  * GPU caches.
3564  */
3565 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3566                              struct radeon_fence *fence)
3567 {
3568         struct radeon_ring *ring = &rdev->ring[fence->ring];
3569         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3570
3571         /* EVENT_WRITE_EOP - flush caches, send int */
3572         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3573         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3574                                  EOP_TC_ACTION_EN |
3575                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3576                                  EVENT_INDEX(5)));
3577         radeon_ring_write(ring, addr & 0xfffffffc);
3578         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3579         radeon_ring_write(ring, fence->seq);
3580         radeon_ring_write(ring, 0);
3581         /* HDP flush */
3582         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3583 }
3584
3585 /**
3586  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3587  *
3588  * @rdev: radeon_device pointer
3589  * @fence: radeon fence object
3590  *
3591  * Emits a fence sequnce number on the compute ring and flushes
3592  * GPU caches.
3593  */
3594 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3595                                  struct radeon_fence *fence)
3596 {
3597         struct radeon_ring *ring = &rdev->ring[fence->ring];
3598         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3599
3600         /* RELEASE_MEM - flush caches, send int */
3601         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3602         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603                                  EOP_TC_ACTION_EN |
3604                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605                                  EVENT_INDEX(5)));
3606         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3607         radeon_ring_write(ring, addr & 0xfffffffc);
3608         radeon_ring_write(ring, upper_32_bits(addr));
3609         radeon_ring_write(ring, fence->seq);
3610         radeon_ring_write(ring, 0);
3611         /* HDP flush */
3612         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3613 }
3614
3615 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3616                              struct radeon_ring *ring,
3617                              struct radeon_semaphore *semaphore,
3618                              bool emit_wait)
3619 {
3620         uint64_t addr = semaphore->gpu_addr;
3621         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3622
3623         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3624         radeon_ring_write(ring, addr & 0xffffffff);
3625         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3626
3627         return true;
3628 }
3629
3630 /**
3631  * cik_copy_cpdma - copy pages using the CP DMA engine
3632  *
3633  * @rdev: radeon_device pointer
3634  * @src_offset: src GPU address
3635  * @dst_offset: dst GPU address
3636  * @num_gpu_pages: number of GPU pages to xfer
3637  * @fence: radeon fence object
3638  *
3639  * Copy GPU paging using the CP DMA engine (CIK+).
3640  * Used by the radeon ttm implementation to move pages if
3641  * registered as the asic copy callback.
3642  */
3643 int cik_copy_cpdma(struct radeon_device *rdev,
3644                    uint64_t src_offset, uint64_t dst_offset,
3645                    unsigned num_gpu_pages,
3646                    struct radeon_fence **fence)
3647 {
3648         struct radeon_semaphore *sem = NULL;
3649         int ring_index = rdev->asic->copy.blit_ring_index;
3650         struct radeon_ring *ring = &rdev->ring[ring_index];
3651         u32 size_in_bytes, cur_size_in_bytes, control;
3652         int i, num_loops;
3653         int r = 0;
3654
3655         r = radeon_semaphore_create(rdev, &sem);
3656         if (r) {
3657                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3658                 return r;
3659         }
3660
3661         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3662         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3663         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3664         if (r) {
3665                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3666                 radeon_semaphore_free(rdev, &sem, NULL);
3667                 return r;
3668         }
3669
3670         radeon_semaphore_sync_to(sem, *fence);
3671         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3672
3673         for (i = 0; i < num_loops; i++) {
3674                 cur_size_in_bytes = size_in_bytes;
3675                 if (cur_size_in_bytes > 0x1fffff)
3676                         cur_size_in_bytes = 0x1fffff;
3677                 size_in_bytes -= cur_size_in_bytes;
3678                 control = 0;
3679                 if (size_in_bytes == 0)
3680                         control |= PACKET3_DMA_DATA_CP_SYNC;
3681                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3682                 radeon_ring_write(ring, control);
3683                 radeon_ring_write(ring, lower_32_bits(src_offset));
3684                 radeon_ring_write(ring, upper_32_bits(src_offset));
3685                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3686                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3687                 radeon_ring_write(ring, cur_size_in_bytes);
3688                 src_offset += cur_size_in_bytes;
3689                 dst_offset += cur_size_in_bytes;
3690         }
3691
3692         r = radeon_fence_emit(rdev, fence, ring->idx);
3693         if (r) {
3694                 radeon_ring_unlock_undo(rdev, ring);
3695                 return r;
3696         }
3697
3698         radeon_ring_unlock_commit(rdev, ring);
3699         radeon_semaphore_free(rdev, &sem, *fence);
3700
3701         return r;
3702 }
3703
3704 /*
3705  * IB stuff
3706  */
3707 /**
3708  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709  *
3710  * @rdev: radeon_device pointer
3711  * @ib: radeon indirect buffer object
3712  *
3713  * Emits an DE (drawing engine) or CE (constant engine) IB
3714  * on the gfx ring.  IBs are usually generated by userspace
3715  * acceleration drivers and submitted to the kernel for
3716  * sheduling on the ring.  This function schedules the IB
3717  * on the gfx ring for execution by the GPU.
3718  */
3719 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720 {
3721         struct radeon_ring *ring = &rdev->ring[ib->ring];
3722         u32 header, control = INDIRECT_BUFFER_VALID;
3723
3724         if (ib->is_const_ib) {
3725                 /* set switch buffer packet before const IB */
3726                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3727                 radeon_ring_write(ring, 0);
3728
3729                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3730         } else {
3731                 u32 next_rptr;
3732                 if (ring->rptr_save_reg) {
3733                         next_rptr = ring->wptr + 3 + 4;
3734                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3735                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3736                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3737                         radeon_ring_write(ring, next_rptr);
3738                 } else if (rdev->wb.enabled) {
3739                         next_rptr = ring->wptr + 5 + 4;
3740                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3741                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3742                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3743                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3744                         radeon_ring_write(ring, next_rptr);
3745                 }
3746
3747                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3748         }
3749
3750         control |= ib->length_dw |
3751                 (ib->vm ? (ib->vm->id << 24) : 0);
3752
3753         radeon_ring_write(ring, header);
3754         radeon_ring_write(ring,
3755 #ifdef __BIG_ENDIAN
3756                           (2 << 0) |
3757 #endif
3758                           (ib->gpu_addr & 0xFFFFFFFC));
3759         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3760         radeon_ring_write(ring, control);
3761 }
3762
3763 /**
3764  * cik_ib_test - basic gfx ring IB test
3765  *
3766  * @rdev: radeon_device pointer
3767  * @ring: radeon_ring structure holding ring information
3768  *
3769  * Allocate an IB and execute it on the gfx ring (CIK).
3770  * Provides a basic gfx ring test to verify that IBs are working.
3771  * Returns 0 on success, error on failure.
3772  */
3773 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3774 {
3775         struct radeon_ib ib;
3776         uint32_t scratch;
3777         uint32_t tmp = 0;
3778         unsigned i;
3779         int r;
3780
3781         r = radeon_scratch_get(rdev, &scratch);
3782         if (r) {
3783                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3784                 return r;
3785         }
3786         WREG32(scratch, 0xCAFEDEAD);
3787         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3788         if (r) {
3789                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3790                 radeon_scratch_free(rdev, scratch);
3791                 return r;
3792         }
3793         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3794         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3795         ib.ptr[2] = 0xDEADBEEF;
3796         ib.length_dw = 3;
3797         r = radeon_ib_schedule(rdev, &ib, NULL);
3798         if (r) {
3799                 radeon_scratch_free(rdev, scratch);
3800                 radeon_ib_free(rdev, &ib);
3801                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3802                 return r;
3803         }
3804         r = radeon_fence_wait(ib.fence, false);
3805         if (r) {
3806                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3807                 radeon_scratch_free(rdev, scratch);
3808                 radeon_ib_free(rdev, &ib);
3809                 return r;
3810         }
3811         for (i = 0; i < rdev->usec_timeout; i++) {
3812                 tmp = RREG32(scratch);
3813                 if (tmp == 0xDEADBEEF)
3814                         break;
3815                 DRM_UDELAY(1);
3816         }
3817         if (i < rdev->usec_timeout) {
3818                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3819         } else {
3820                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3821                           scratch, tmp);
3822                 r = -EINVAL;
3823         }
3824         radeon_scratch_free(rdev, scratch);
3825         radeon_ib_free(rdev, &ib);
3826         return r;
3827 }
3828
3829 /*
3830  * CP.
3831  * On CIK, gfx and compute now have independant command processors.
3832  *
3833  * GFX
3834  * Gfx consists of a single ring and can process both gfx jobs and
3835  * compute jobs.  The gfx CP consists of three microengines (ME):
3836  * PFP - Pre-Fetch Parser
3837  * ME - Micro Engine
3838  * CE - Constant Engine
3839  * The PFP and ME make up what is considered the Drawing Engine (DE).
3840  * The CE is an asynchronous engine used for updating buffer desciptors
3841  * used by the DE so that they can be loaded into cache in parallel
3842  * while the DE is processing state update packets.
3843  *
3844  * Compute
3845  * The compute CP consists of two microengines (ME):
3846  * MEC1 - Compute MicroEngine 1
3847  * MEC2 - Compute MicroEngine 2
3848  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3849  * The queues are exposed to userspace and are programmed directly
3850  * by the compute runtime.
3851  */
3852 /**
3853  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3854  *
3855  * @rdev: radeon_device pointer
3856  * @enable: enable or disable the MEs
3857  *
3858  * Halts or unhalts the gfx MEs.
3859  */
3860 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3861 {
3862         if (enable)
3863                 WREG32(CP_ME_CNTL, 0);
3864         else {
3865                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3866                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3867                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3868                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3869         }
3870         udelay(50);
3871 }
3872
3873 /**
3874  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3875  *
3876  * @rdev: radeon_device pointer
3877  *
3878  * Loads the gfx PFP, ME, and CE ucode.
3879  * Returns 0 for success, -EINVAL if the ucode is not available.
3880  */
3881 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3882 {
3883         const __be32 *fw_data;
3884         int i;
3885
3886         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3887                 return -EINVAL;
3888
3889         cik_cp_gfx_enable(rdev, false);
3890
3891         /* PFP */
3892         fw_data = (const __be32 *)rdev->pfp_fw->data;
3893         WREG32(CP_PFP_UCODE_ADDR, 0);
3894         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3895                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3896         WREG32(CP_PFP_UCODE_ADDR, 0);
3897
3898         /* CE */
3899         fw_data = (const __be32 *)rdev->ce_fw->data;
3900         WREG32(CP_CE_UCODE_ADDR, 0);
3901         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3902                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3903         WREG32(CP_CE_UCODE_ADDR, 0);
3904
3905         /* ME */
3906         fw_data = (const __be32 *)rdev->me_fw->data;
3907         WREG32(CP_ME_RAM_WADDR, 0);
3908         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3909                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3910         WREG32(CP_ME_RAM_WADDR, 0);
3911
3912         WREG32(CP_PFP_UCODE_ADDR, 0);
3913         WREG32(CP_CE_UCODE_ADDR, 0);
3914         WREG32(CP_ME_RAM_WADDR, 0);
3915         WREG32(CP_ME_RAM_RADDR, 0);
3916         return 0;
3917 }
3918
3919 /**
3920  * cik_cp_gfx_start - start the gfx ring
3921  *
3922  * @rdev: radeon_device pointer
3923  *
3924  * Enables the ring and loads the clear state context and other
3925  * packets required to init the ring.
3926  * Returns 0 for success, error for failure.
3927  */
3928 static int cik_cp_gfx_start(struct radeon_device *rdev)
3929 {
3930         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3931         int r, i;
3932
3933         /* init the CP */
3934         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3935         WREG32(CP_ENDIAN_SWAP, 0);
3936         WREG32(CP_DEVICE_ID, 1);
3937
3938         cik_cp_gfx_enable(rdev, true);
3939
3940         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3941         if (r) {
3942                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3943                 return r;
3944         }
3945
3946         /* init the CE partitions.  CE only used for gfx on CIK */
3947         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3948         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3949         radeon_ring_write(ring, 0xc000);
3950         radeon_ring_write(ring, 0xc000);
3951
3952         /* setup clear context state */
3953         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3954         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3955
3956         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3957         radeon_ring_write(ring, 0x80000000);
3958         radeon_ring_write(ring, 0x80000000);
3959
3960         for (i = 0; i < cik_default_size; i++)
3961                 radeon_ring_write(ring, cik_default_state[i]);
3962
3963         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3964         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3965
3966         /* set clear context state */
3967         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3968         radeon_ring_write(ring, 0);
3969
3970         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3971         radeon_ring_write(ring, 0x00000316);
3972         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3973         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3974
3975         radeon_ring_unlock_commit(rdev, ring);
3976
3977         return 0;
3978 }
3979
3980 /**
3981  * cik_cp_gfx_fini - stop the gfx ring
3982  *
3983  * @rdev: radeon_device pointer
3984  *
3985  * Stop the gfx ring and tear down the driver ring
3986  * info.
3987  */
3988 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3989 {
3990         cik_cp_gfx_enable(rdev, false);
3991         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3992 }
3993
3994 /**
3995  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3996  *
3997  * @rdev: radeon_device pointer
3998  *
3999  * Program the location and size of the gfx ring buffer
4000  * and test it to make sure it's working.
4001  * Returns 0 for success, error for failure.
4002  */
4003 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4004 {
4005         struct radeon_ring *ring;
4006         u32 tmp;
4007         u32 rb_bufsz;
4008         u64 rb_addr;
4009         int r;
4010
4011         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4012         if (rdev->family != CHIP_HAWAII)
4013                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4014
4015         /* Set the write pointer delay */
4016         WREG32(CP_RB_WPTR_DELAY, 0);
4017
4018         /* set the RB to use vmid 0 */
4019         WREG32(CP_RB_VMID, 0);
4020
4021         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4022
4023         /* ring 0 - compute and gfx */
4024         /* Set ring buffer size */
4025         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4026         rb_bufsz = order_base_2(ring->ring_size / 8);
4027         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4028 #ifdef __BIG_ENDIAN
4029         tmp |= BUF_SWAP_32BIT;
4030 #endif
4031         WREG32(CP_RB0_CNTL, tmp);
4032
4033         /* Initialize the ring buffer's read and write pointers */
4034         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4035         ring->wptr = 0;
4036         WREG32(CP_RB0_WPTR, ring->wptr);
4037
4038         /* set the wb address wether it's enabled or not */
4039         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4040         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4041
4042         /* scratch register shadowing is no longer supported */
4043         WREG32(SCRATCH_UMSK, 0);
4044
4045         if (!rdev->wb.enabled)
4046                 tmp |= RB_NO_UPDATE;
4047
4048         mdelay(1);
4049         WREG32(CP_RB0_CNTL, tmp);
4050
4051         rb_addr = ring->gpu_addr >> 8;
4052         WREG32(CP_RB0_BASE, rb_addr);
4053         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4054
4055         /* start the ring */
4056         cik_cp_gfx_start(rdev);
4057         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4058         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4059         if (r) {
4060                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4061                 return r;
4062         }
4063
4064         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4065                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4066
4067         return 0;
4068 }
4069
4070 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4071                      struct radeon_ring *ring)
4072 {
4073         u32 rptr;
4074
4075         if (rdev->wb.enabled)
4076                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4077         else
4078                 rptr = RREG32(CP_RB0_RPTR);
4079
4080         return rptr;
4081 }
4082
4083 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4084                      struct radeon_ring *ring)
4085 {
4086         u32 wptr;
4087
4088         wptr = RREG32(CP_RB0_WPTR);
4089
4090         return wptr;
4091 }
4092
4093 void cik_gfx_set_wptr(struct radeon_device *rdev,
4094                       struct radeon_ring *ring)
4095 {
4096         WREG32(CP_RB0_WPTR, ring->wptr);
4097         (void)RREG32(CP_RB0_WPTR);
4098 }
4099
4100 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4101                          struct radeon_ring *ring)
4102 {
4103         u32 rptr;
4104
4105         if (rdev->wb.enabled) {
4106                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4107         } else {
4108                 mutex_lock(&rdev->srbm_mutex);
4109                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4110                 rptr = RREG32(CP_HQD_PQ_RPTR);
4111                 cik_srbm_select(rdev, 0, 0, 0, 0);
4112                 mutex_unlock(&rdev->srbm_mutex);
4113         }
4114
4115         return rptr;
4116 }
4117
4118 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4119                          struct radeon_ring *ring)
4120 {
4121         u32 wptr;
4122
4123         if (rdev->wb.enabled) {
4124                 /* XXX check if swapping is necessary on BE */
4125                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4126         } else {
4127                 mutex_lock(&rdev->srbm_mutex);
4128                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4129                 wptr = RREG32(CP_HQD_PQ_WPTR);
4130                 cik_srbm_select(rdev, 0, 0, 0, 0);
4131                 mutex_unlock(&rdev->srbm_mutex);
4132         }
4133
4134         return wptr;
4135 }
4136
4137 void cik_compute_set_wptr(struct radeon_device *rdev,
4138                           struct radeon_ring *ring)
4139 {
4140         /* XXX check if swapping is necessary on BE */
4141         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4142         WDOORBELL32(ring->doorbell_index, ring->wptr);
4143 }
4144
4145 /**
4146  * cik_cp_compute_enable - enable/disable the compute CP MEs
4147  *
4148  * @rdev: radeon_device pointer
4149  * @enable: enable or disable the MEs
4150  *
4151  * Halts or unhalts the compute MEs.
4152  */
4153 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4154 {
4155         if (enable)
4156                 WREG32(CP_MEC_CNTL, 0);
4157         else {
4158                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4159                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4160                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4161         }
4162         udelay(50);
4163 }
4164
4165 /**
4166  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4167  *
4168  * @rdev: radeon_device pointer
4169  *
4170  * Loads the compute MEC1&2 ucode.
4171  * Returns 0 for success, -EINVAL if the ucode is not available.
4172  */
4173 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4174 {
4175         const __be32 *fw_data;
4176         int i;
4177
4178         if (!rdev->mec_fw)
4179                 return -EINVAL;
4180
4181         cik_cp_compute_enable(rdev, false);
4182
4183         /* MEC1 */
4184         fw_data = (const __be32 *)rdev->mec_fw->data;
4185         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4186         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4187                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4188         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4189
4190         if (rdev->family == CHIP_KAVERI) {
4191                 /* MEC2 */
4192                 fw_data = (const __be32 *)rdev->mec_fw->data;
4193                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4194                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4195                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4196                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4197         }
4198
4199         return 0;
4200 }
4201
4202 /**
4203  * cik_cp_compute_start - start the compute queues
4204  *
4205  * @rdev: radeon_device pointer
4206  *
4207  * Enable the compute queues.
4208  * Returns 0 for success, error for failure.
4209  */
4210 static int cik_cp_compute_start(struct radeon_device *rdev)
4211 {
4212         cik_cp_compute_enable(rdev, true);
4213
4214         return 0;
4215 }
4216
4217 /**
4218  * cik_cp_compute_fini - stop the compute queues
4219  *
4220  * @rdev: radeon_device pointer
4221  *
4222  * Stop the compute queues and tear down the driver queue
4223  * info.
4224  */
4225 static void cik_cp_compute_fini(struct radeon_device *rdev)
4226 {
4227         int i, idx, r;
4228
4229         cik_cp_compute_enable(rdev, false);
4230
4231         for (i = 0; i < 2; i++) {
4232                 if (i == 0)
4233                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4234                 else
4235                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4236
4237                 if (rdev->ring[idx].mqd_obj) {
4238                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4239                         if (unlikely(r != 0))
4240                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4241
4242                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4243                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4244
4245                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4246                         rdev->ring[idx].mqd_obj = NULL;
4247                 }
4248         }
4249 }
4250
4251 static void cik_mec_fini(struct radeon_device *rdev)
4252 {
4253         int r;
4254
4255         if (rdev->mec.hpd_eop_obj) {
4256                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4257                 if (unlikely(r != 0))
4258                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4259                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4260                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4261
4262                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4263                 rdev->mec.hpd_eop_obj = NULL;
4264         }
4265 }
4266
4267 #define MEC_HPD_SIZE 2048
4268
4269 static int cik_mec_init(struct radeon_device *rdev)
4270 {
4271         int r;
4272         u32 *hpd;
4273
4274         /*
4275          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4276          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4277          */
4278         if (rdev->family == CHIP_KAVERI)
4279                 rdev->mec.num_mec = 2;
4280         else
4281                 rdev->mec.num_mec = 1;
4282         rdev->mec.num_pipe = 4;
4283         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4284
4285         if (rdev->mec.hpd_eop_obj == NULL) {
4286                 r = radeon_bo_create(rdev,
4287                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4288                                      PAGE_SIZE, true,
4289                                      RADEON_GEM_DOMAIN_GTT, NULL,
4290                                      &rdev->mec.hpd_eop_obj);
4291                 if (r) {
4292                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4293                         return r;
4294                 }
4295         }
4296
4297         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4298         if (unlikely(r != 0)) {
4299                 cik_mec_fini(rdev);
4300                 return r;
4301         }
4302         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4303                           &rdev->mec.hpd_eop_gpu_addr);
4304         if (r) {
4305                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4306                 cik_mec_fini(rdev);
4307                 return r;
4308         }
4309         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4310         if (r) {
4311                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4312                 cik_mec_fini(rdev);
4313                 return r;
4314         }
4315
4316         /* clear memory.  Not sure if this is required or not */
4317         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4318
4319         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4320         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4321
4322         return 0;
4323 }
4324
4325 struct hqd_registers
4326 {
4327         u32 cp_mqd_base_addr;
4328         u32 cp_mqd_base_addr_hi;
4329         u32 cp_hqd_active;
4330         u32 cp_hqd_vmid;
4331         u32 cp_hqd_persistent_state;
4332         u32 cp_hqd_pipe_priority;
4333         u32 cp_hqd_queue_priority;
4334         u32 cp_hqd_quantum;
4335         u32 cp_hqd_pq_base;
4336         u32 cp_hqd_pq_base_hi;
4337         u32 cp_hqd_pq_rptr;
4338         u32 cp_hqd_pq_rptr_report_addr;
4339         u32 cp_hqd_pq_rptr_report_addr_hi;
4340         u32 cp_hqd_pq_wptr_poll_addr;
4341         u32 cp_hqd_pq_wptr_poll_addr_hi;
4342         u32 cp_hqd_pq_doorbell_control;
4343         u32 cp_hqd_pq_wptr;
4344         u32 cp_hqd_pq_control;
4345         u32 cp_hqd_ib_base_addr;
4346         u32 cp_hqd_ib_base_addr_hi;
4347         u32 cp_hqd_ib_rptr;
4348         u32 cp_hqd_ib_control;
4349         u32 cp_hqd_iq_timer;
4350         u32 cp_hqd_iq_rptr;
4351         u32 cp_hqd_dequeue_request;
4352         u32 cp_hqd_dma_offload;
4353         u32 cp_hqd_sema_cmd;
4354         u32 cp_hqd_msg_type;
4355         u32 cp_hqd_atomic0_preop_lo;
4356         u32 cp_hqd_atomic0_preop_hi;
4357         u32 cp_hqd_atomic1_preop_lo;
4358         u32 cp_hqd_atomic1_preop_hi;
4359         u32 cp_hqd_hq_scheduler0;
4360         u32 cp_hqd_hq_scheduler1;
4361         u32 cp_mqd_control;
4362 };
4363
4364 struct bonaire_mqd
4365 {
4366         u32 header;
4367         u32 dispatch_initiator;
4368         u32 dimensions[3];
4369         u32 start_idx[3];
4370         u32 num_threads[3];
4371         u32 pipeline_stat_enable;
4372         u32 perf_counter_enable;
4373         u32 pgm[2];
4374         u32 tba[2];
4375         u32 tma[2];
4376         u32 pgm_rsrc[2];
4377         u32 vmid;
4378         u32 resource_limits;
4379         u32 static_thread_mgmt01[2];
4380         u32 tmp_ring_size;
4381         u32 static_thread_mgmt23[2];
4382         u32 restart[3];
4383         u32 thread_trace_enable;
4384         u32 reserved1;
4385         u32 user_data[16];
4386         u32 vgtcs_invoke_count[2];
4387         struct hqd_registers queue_state;
4388         u32 dequeue_cntr;
4389         u32 interrupt_queue[64];
4390 };
4391
4392 /**
4393  * cik_cp_compute_resume - setup the compute queue registers
4394  *
4395  * @rdev: radeon_device pointer
4396  *
4397  * Program the compute queues and test them to make sure they
4398  * are working.
4399  * Returns 0 for success, error for failure.
4400  */
4401 static int cik_cp_compute_resume(struct radeon_device *rdev)
4402 {
4403         int r, i, idx;
4404         u32 tmp;
4405         bool use_doorbell = true;
4406         u64 hqd_gpu_addr;
4407         u64 mqd_gpu_addr;
4408         u64 eop_gpu_addr;
4409         u64 wb_gpu_addr;
4410         u32 *buf;
4411         struct bonaire_mqd *mqd;
4412
4413         r = cik_cp_compute_start(rdev);
4414         if (r)
4415                 return r;
4416
4417         /* fix up chicken bits */
4418         tmp = RREG32(CP_CPF_DEBUG);
4419         tmp |= (1 << 23);
4420         WREG32(CP_CPF_DEBUG, tmp);
4421
4422         /* init the pipes */
4423         mutex_lock(&rdev->srbm_mutex);
4424         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4425                 int me = (i < 4) ? 1 : 2;
4426                 int pipe = (i < 4) ? i : (i - 4);
4427
4428                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4429
4430                 cik_srbm_select(rdev, me, pipe, 0, 0);
4431
4432                 /* write the EOP addr */
4433                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4434                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4435
4436                 /* set the VMID assigned */
4437                 WREG32(CP_HPD_EOP_VMID, 0);
4438
4439                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4440                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4441                 tmp &= ~EOP_SIZE_MASK;
4442                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4443                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4444         }
4445         cik_srbm_select(rdev, 0, 0, 0, 0);
4446         mutex_unlock(&rdev->srbm_mutex);
4447
4448         /* init the queues.  Just two for now. */
4449         for (i = 0; i < 2; i++) {
4450                 if (i == 0)
4451                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4452                 else
4453                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4454
4455                 if (rdev->ring[idx].mqd_obj == NULL) {
4456                         r = radeon_bo_create(rdev,
4457                                              sizeof(struct bonaire_mqd),
4458                                              PAGE_SIZE, true,
4459                                              RADEON_GEM_DOMAIN_GTT, NULL,
4460                                              &rdev->ring[idx].mqd_obj);
4461                         if (r) {
4462                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4463                                 return r;
4464                         }
4465                 }
4466
4467                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4468                 if (unlikely(r != 0)) {
4469                         cik_cp_compute_fini(rdev);
4470                         return r;
4471                 }
4472                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4473                                   &mqd_gpu_addr);
4474                 if (r) {
4475                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4476                         cik_cp_compute_fini(rdev);
4477                         return r;
4478                 }
4479                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4480                 if (r) {
4481                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4482                         cik_cp_compute_fini(rdev);
4483                         return r;
4484                 }
4485
4486                 /* init the mqd struct */
4487                 memset(buf, 0, sizeof(struct bonaire_mqd));
4488
4489                 mqd = (struct bonaire_mqd *)buf;
4490                 mqd->header = 0xC0310800;
4491                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4492                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4493                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4494                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4495
4496                 mutex_lock(&rdev->srbm_mutex);
4497                 cik_srbm_select(rdev, rdev->ring[idx].me,
4498                                 rdev->ring[idx].pipe,
4499                                 rdev->ring[idx].queue, 0);
4500
4501                 /* disable wptr polling */
4502                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4503                 tmp &= ~WPTR_POLL_EN;
4504                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4505
4506                 /* enable doorbell? */
4507                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4508                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4509                 if (use_doorbell)
4510                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4511                 else
4512                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4513                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4514                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4515
4516                 /* disable the queue if it's active */
4517                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4518                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4519                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4520                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4521                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4522                         for (i = 0; i < rdev->usec_timeout; i++) {
4523                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4524                                         break;
4525                                 udelay(1);
4526                         }
4527                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4528                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4529                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4530                 }
4531
4532                 /* set the pointer to the MQD */
4533                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4534                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4535                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4536                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4537                 /* set MQD vmid to 0 */
4538                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4539                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4540                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4541
4542                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4543                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4544                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4545                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4546                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4547                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4548
4549                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4550                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4551                 mqd->queue_state.cp_hqd_pq_control &=
4552                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4553
4554                 mqd->queue_state.cp_hqd_pq_control |=
4555                         order_base_2(rdev->ring[idx].ring_size / 8);
4556                 mqd->queue_state.cp_hqd_pq_control |=
4557                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4558 #ifdef __BIG_ENDIAN
4559                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4560 #endif
4561                 mqd->queue_state.cp_hqd_pq_control &=
4562                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4563                 mqd->queue_state.cp_hqd_pq_control |=
4564                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4565                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4566
4567                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4568                 if (i == 0)
4569                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4570                 else
4571                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4572                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4573                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4574                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4575                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4576                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4577
4578                 /* set the wb address wether it's enabled or not */
4579                 if (i == 0)
4580                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4581                 else
4582                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4583                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4584                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4585                         upper_32_bits(wb_gpu_addr) & 0xffff;
4586                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4587                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4588                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4589                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4590
4591                 /* enable the doorbell if requested */
4592                 if (use_doorbell) {
4593                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4594                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4595                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4596                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4597                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4598                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4599                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4600                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4601
4602                 } else {
4603                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4604                 }
4605                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4606                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4607
4608                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4609                 rdev->ring[idx].wptr = 0;
4610                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4611                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4612                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4613
4614                 /* set the vmid for the queue */
4615                 mqd->queue_state.cp_hqd_vmid = 0;
4616                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4617
4618                 /* activate the queue */
4619                 mqd->queue_state.cp_hqd_active = 1;
4620                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4621
4622                 cik_srbm_select(rdev, 0, 0, 0, 0);
4623                 mutex_unlock(&rdev->srbm_mutex);
4624
4625                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4626                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4627
4628                 rdev->ring[idx].ready = true;
4629                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4630                 if (r)
4631                         rdev->ring[idx].ready = false;
4632         }
4633
4634         return 0;
4635 }
4636
4637 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4638 {
4639         cik_cp_gfx_enable(rdev, enable);
4640         cik_cp_compute_enable(rdev, enable);
4641 }
4642
4643 static int cik_cp_load_microcode(struct radeon_device *rdev)
4644 {
4645         int r;
4646
4647         r = cik_cp_gfx_load_microcode(rdev);
4648         if (r)
4649                 return r;
4650         r = cik_cp_compute_load_microcode(rdev);
4651         if (r)
4652                 return r;
4653
4654         return 0;
4655 }
4656
4657 static void cik_cp_fini(struct radeon_device *rdev)
4658 {
4659         cik_cp_gfx_fini(rdev);
4660         cik_cp_compute_fini(rdev);
4661 }
4662
4663 static int cik_cp_resume(struct radeon_device *rdev)
4664 {
4665         int r;
4666
4667         cik_enable_gui_idle_interrupt(rdev, false);
4668
4669         r = cik_cp_load_microcode(rdev);
4670         if (r)
4671                 return r;
4672
4673         r = cik_cp_gfx_resume(rdev);
4674         if (r)
4675                 return r;
4676         r = cik_cp_compute_resume(rdev);
4677         if (r)
4678                 return r;
4679
4680         cik_enable_gui_idle_interrupt(rdev, true);
4681
4682         return 0;
4683 }
4684
4685 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4686 {
4687         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4688                 RREG32(GRBM_STATUS));
4689         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4690                 RREG32(GRBM_STATUS2));
4691         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4692                 RREG32(GRBM_STATUS_SE0));
4693         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4694                 RREG32(GRBM_STATUS_SE1));
4695         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4696                 RREG32(GRBM_STATUS_SE2));
4697         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4698                 RREG32(GRBM_STATUS_SE3));
4699         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4700                 RREG32(SRBM_STATUS));
4701         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4702                 RREG32(SRBM_STATUS2));
4703         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4704                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4705         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4706                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4707         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4708         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4709                  RREG32(CP_STALLED_STAT1));
4710         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4711                  RREG32(CP_STALLED_STAT2));
4712         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4713                  RREG32(CP_STALLED_STAT3));
4714         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4715                  RREG32(CP_CPF_BUSY_STAT));
4716         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4717                  RREG32(CP_CPF_STALLED_STAT1));
4718         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4719         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4720         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4721                  RREG32(CP_CPC_STALLED_STAT1));
4722         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4723 }
4724
4725 /**
4726  * cik_gpu_check_soft_reset - check which blocks are busy
4727  *
4728  * @rdev: radeon_device pointer
4729  *
4730  * Check which blocks are busy and return the relevant reset
4731  * mask to be used by cik_gpu_soft_reset().
4732  * Returns a mask of the blocks to be reset.
4733  */
4734 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4735 {
4736         u32 reset_mask = 0;
4737         u32 tmp;
4738
4739         /* GRBM_STATUS */
4740         tmp = RREG32(GRBM_STATUS);
4741         if (tmp & (PA_BUSY | SC_BUSY |
4742                    BCI_BUSY | SX_BUSY |
4743                    TA_BUSY | VGT_BUSY |
4744                    DB_BUSY | CB_BUSY |
4745                    GDS_BUSY | SPI_BUSY |
4746                    IA_BUSY | IA_BUSY_NO_DMA))
4747                 reset_mask |= RADEON_RESET_GFX;
4748
4749         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4750                 reset_mask |= RADEON_RESET_CP;
4751
4752         /* GRBM_STATUS2 */
4753         tmp = RREG32(GRBM_STATUS2);
4754         if (tmp & RLC_BUSY)
4755                 reset_mask |= RADEON_RESET_RLC;
4756
4757         /* SDMA0_STATUS_REG */
4758         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4759         if (!(tmp & SDMA_IDLE))
4760                 reset_mask |= RADEON_RESET_DMA;
4761
4762         /* SDMA1_STATUS_REG */
4763         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4764         if (!(tmp & SDMA_IDLE))
4765                 reset_mask |= RADEON_RESET_DMA1;
4766
4767         /* SRBM_STATUS2 */
4768         tmp = RREG32(SRBM_STATUS2);
4769         if (tmp & SDMA_BUSY)
4770                 reset_mask |= RADEON_RESET_DMA;
4771
4772         if (tmp & SDMA1_BUSY)
4773                 reset_mask |= RADEON_RESET_DMA1;
4774
4775         /* SRBM_STATUS */
4776         tmp = RREG32(SRBM_STATUS);
4777
4778         if (tmp & IH_BUSY)
4779                 reset_mask |= RADEON_RESET_IH;
4780
4781         if (tmp & SEM_BUSY)
4782                 reset_mask |= RADEON_RESET_SEM;
4783
4784         if (tmp & GRBM_RQ_PENDING)
4785                 reset_mask |= RADEON_RESET_GRBM;
4786
4787         if (tmp & VMC_BUSY)
4788                 reset_mask |= RADEON_RESET_VMC;
4789
4790         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4791                    MCC_BUSY | MCD_BUSY))
4792                 reset_mask |= RADEON_RESET_MC;
4793
4794         if (evergreen_is_display_hung(rdev))
4795                 reset_mask |= RADEON_RESET_DISPLAY;
4796
4797         /* Skip MC reset as it's mostly likely not hung, just busy */
4798         if (reset_mask & RADEON_RESET_MC) {
4799                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4800                 reset_mask &= ~RADEON_RESET_MC;
4801         }
4802
4803         return reset_mask;
4804 }
4805
4806 /**
4807  * cik_gpu_soft_reset - soft reset GPU
4808  *
4809  * @rdev: radeon_device pointer
4810  * @reset_mask: mask of which blocks to reset
4811  *
4812  * Soft reset the blocks specified in @reset_mask.
4813  */
4814 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4815 {
4816         struct evergreen_mc_save save;
4817         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4818         u32 tmp;
4819
4820         if (reset_mask == 0)
4821                 return;
4822
4823         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4824
4825         cik_print_gpu_status_regs(rdev);
4826         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4827                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4828         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4829                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4830
4831         /* disable CG/PG */
4832         cik_fini_pg(rdev);
4833         cik_fini_cg(rdev);
4834
4835         /* stop the rlc */
4836         cik_rlc_stop(rdev);
4837
4838         /* Disable GFX parsing/prefetching */
4839         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4840
4841         /* Disable MEC parsing/prefetching */
4842         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4843
4844         if (reset_mask & RADEON_RESET_DMA) {
4845                 /* sdma0 */
4846                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4847                 tmp |= SDMA_HALT;
4848                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4849         }
4850         if (reset_mask & RADEON_RESET_DMA1) {
4851                 /* sdma1 */
4852                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4853                 tmp |= SDMA_HALT;
4854                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4855         }
4856
4857         evergreen_mc_stop(rdev, &save);
4858         if (evergreen_mc_wait_for_idle(rdev)) {
4859                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4860         }
4861
4862         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4863                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4864
4865         if (reset_mask & RADEON_RESET_CP) {
4866                 grbm_soft_reset |= SOFT_RESET_CP;
4867
4868                 srbm_soft_reset |= SOFT_RESET_GRBM;
4869         }
4870
4871         if (reset_mask & RADEON_RESET_DMA)
4872                 srbm_soft_reset |= SOFT_RESET_SDMA;
4873
4874         if (reset_mask & RADEON_RESET_DMA1)
4875                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4876
4877         if (reset_mask & RADEON_RESET_DISPLAY)
4878                 srbm_soft_reset |= SOFT_RESET_DC;
4879
4880         if (reset_mask & RADEON_RESET_RLC)
4881                 grbm_soft_reset |= SOFT_RESET_RLC;
4882
4883         if (reset_mask & RADEON_RESET_SEM)
4884                 srbm_soft_reset |= SOFT_RESET_SEM;
4885
4886         if (reset_mask & RADEON_RESET_IH)
4887                 srbm_soft_reset |= SOFT_RESET_IH;
4888
4889         if (reset_mask & RADEON_RESET_GRBM)
4890                 srbm_soft_reset |= SOFT_RESET_GRBM;
4891
4892         if (reset_mask & RADEON_RESET_VMC)
4893                 srbm_soft_reset |= SOFT_RESET_VMC;
4894
4895         if (!(rdev->flags & RADEON_IS_IGP)) {
4896                 if (reset_mask & RADEON_RESET_MC)
4897                         srbm_soft_reset |= SOFT_RESET_MC;
4898         }
4899
4900         if (grbm_soft_reset) {
4901                 tmp = RREG32(GRBM_SOFT_RESET);
4902                 tmp |= grbm_soft_reset;
4903                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4904                 WREG32(GRBM_SOFT_RESET, tmp);
4905                 tmp = RREG32(GRBM_SOFT_RESET);
4906
4907                 udelay(50);
4908
4909                 tmp &= ~grbm_soft_reset;
4910                 WREG32(GRBM_SOFT_RESET, tmp);
4911                 tmp = RREG32(GRBM_SOFT_RESET);
4912         }
4913
4914         if (srbm_soft_reset) {
4915                 tmp = RREG32(SRBM_SOFT_RESET);
4916                 tmp |= srbm_soft_reset;
4917                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4918                 WREG32(SRBM_SOFT_RESET, tmp);
4919                 tmp = RREG32(SRBM_SOFT_RESET);
4920
4921                 udelay(50);
4922
4923                 tmp &= ~srbm_soft_reset;
4924                 WREG32(SRBM_SOFT_RESET, tmp);
4925                 tmp = RREG32(SRBM_SOFT_RESET);
4926         }
4927
4928         /* Wait a little for things to settle down */
4929         udelay(50);
4930
4931         evergreen_mc_resume(rdev, &save);
4932         udelay(50);
4933
4934         cik_print_gpu_status_regs(rdev);
4935 }
4936
4937 struct kv_reset_save_regs {
4938         u32 gmcon_reng_execute;
4939         u32 gmcon_misc;
4940         u32 gmcon_misc3;
4941 };
4942
4943 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4944                                    struct kv_reset_save_regs *save)
4945 {
4946         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4947         save->gmcon_misc = RREG32(GMCON_MISC);
4948         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4949
4950         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4951         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4952                                                 STCTRL_STUTTER_EN));
4953 }
4954
4955 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4956                                       struct kv_reset_save_regs *save)
4957 {
4958         int i;
4959
4960         WREG32(GMCON_PGFSM_WRITE, 0);
4961         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4962
4963         for (i = 0; i < 5; i++)
4964                 WREG32(GMCON_PGFSM_WRITE, 0);
4965
4966         WREG32(GMCON_PGFSM_WRITE, 0);
4967         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4968
4969         for (i = 0; i < 5; i++)
4970                 WREG32(GMCON_PGFSM_WRITE, 0);
4971
4972         WREG32(GMCON_PGFSM_WRITE, 0x210000);
4973         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4974
4975         for (i = 0; i < 5; i++)
4976                 WREG32(GMCON_PGFSM_WRITE, 0);
4977
4978         WREG32(GMCON_PGFSM_WRITE, 0x21003);
4979         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4980
4981         for (i = 0; i < 5; i++)
4982                 WREG32(GMCON_PGFSM_WRITE, 0);
4983
4984         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4985         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4986
4987         for (i = 0; i < 5; i++)
4988                 WREG32(GMCON_PGFSM_WRITE, 0);
4989
4990         WREG32(GMCON_PGFSM_WRITE, 0);
4991         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4992
4993         for (i = 0; i < 5; i++)
4994                 WREG32(GMCON_PGFSM_WRITE, 0);
4995
4996         WREG32(GMCON_PGFSM_WRITE, 0x420000);
4997         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4998
4999         for (i = 0; i < 5; i++)
5000                 WREG32(GMCON_PGFSM_WRITE, 0);
5001
5002         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5003         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5004
5005         for (i = 0; i < 5; i++)
5006                 WREG32(GMCON_PGFSM_WRITE, 0);
5007
5008         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5009         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5010
5011         for (i = 0; i < 5; i++)
5012                 WREG32(GMCON_PGFSM_WRITE, 0);
5013
5014         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5015         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5016
5017         for (i = 0; i < 5; i++)
5018                 WREG32(GMCON_PGFSM_WRITE, 0);
5019
5020         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5021         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5022
5023         WREG32(GMCON_MISC3, save->gmcon_misc3);
5024         WREG32(GMCON_MISC, save->gmcon_misc);
5025         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5026 }
5027
5028 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5029 {
5030         struct evergreen_mc_save save;
5031         struct kv_reset_save_regs kv_save = { 0 };
5032         u32 tmp, i;
5033
5034         dev_info(rdev->dev, "GPU pci config reset\n");
5035
5036         /* disable dpm? */
5037
5038         /* disable cg/pg */
5039         cik_fini_pg(rdev);
5040         cik_fini_cg(rdev);
5041
5042         /* Disable GFX parsing/prefetching */
5043         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5044
5045         /* Disable MEC parsing/prefetching */
5046         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5047
5048         /* sdma0 */
5049         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5050         tmp |= SDMA_HALT;
5051         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5052         /* sdma1 */
5053         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5054         tmp |= SDMA_HALT;
5055         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5056         /* XXX other engines? */
5057
5058         /* halt the rlc, disable cp internal ints */
5059         cik_rlc_stop(rdev);
5060
5061         udelay(50);
5062
5063         /* disable mem access */
5064         evergreen_mc_stop(rdev, &save);
5065         if (evergreen_mc_wait_for_idle(rdev)) {
5066                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5067         }
5068
5069         if (rdev->flags & RADEON_IS_IGP)
5070                 kv_save_regs_for_reset(rdev, &kv_save);
5071
5072         /* disable BM */
5073         pci_clear_master(rdev->pdev);
5074         /* reset */
5075         radeon_pci_config_reset(rdev);
5076
5077         udelay(100);
5078
5079         /* wait for asic to come out of reset */
5080         for (i = 0; i < rdev->usec_timeout; i++) {
5081                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5082                         break;
5083                 udelay(1);
5084         }
5085
5086         /* does asic init need to be run first??? */
5087         if (rdev->flags & RADEON_IS_IGP)
5088                 kv_restore_regs_for_reset(rdev, &kv_save);
5089 }
5090
5091 /**
5092  * cik_asic_reset - soft reset GPU
5093  *
5094  * @rdev: radeon_device pointer
5095  *
5096  * Look up which blocks are hung and attempt
5097  * to reset them.
5098  * Returns 0 for success.
5099  */
5100 int cik_asic_reset(struct radeon_device *rdev)
5101 {
5102         u32 reset_mask;
5103
5104         reset_mask = cik_gpu_check_soft_reset(rdev);
5105
5106         if (reset_mask)
5107                 r600_set_bios_scratch_engine_hung(rdev, true);
5108
5109         /* try soft reset */
5110         cik_gpu_soft_reset(rdev, reset_mask);
5111
5112         reset_mask = cik_gpu_check_soft_reset(rdev);
5113
5114         /* try pci config reset */
5115         if (reset_mask && radeon_hard_reset)
5116                 cik_gpu_pci_config_reset(rdev);
5117
5118         reset_mask = cik_gpu_check_soft_reset(rdev);
5119
5120         if (!reset_mask)
5121                 r600_set_bios_scratch_engine_hung(rdev, false);
5122
5123         return 0;
5124 }
5125
5126 /**
5127  * cik_gfx_is_lockup - check if the 3D engine is locked up
5128  *
5129  * @rdev: radeon_device pointer
5130  * @ring: radeon_ring structure holding ring information
5131  *
5132  * Check if the 3D engine is locked up (CIK).
5133  * Returns true if the engine is locked, false if not.
5134  */
5135 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5136 {
5137         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5138
5139         if (!(reset_mask & (RADEON_RESET_GFX |
5140                             RADEON_RESET_COMPUTE |
5141                             RADEON_RESET_CP))) {
5142                 radeon_ring_lockup_update(rdev, ring);
5143                 return false;
5144         }
5145         return radeon_ring_test_lockup(rdev, ring);
5146 }
5147
5148 /* MC */
5149 /**
5150  * cik_mc_program - program the GPU memory controller
5151  *
5152  * @rdev: radeon_device pointer
5153  *
5154  * Set the location of vram, gart, and AGP in the GPU's
5155  * physical address space (CIK).
5156  */
5157 static void cik_mc_program(struct radeon_device *rdev)
5158 {
5159         struct evergreen_mc_save save;
5160         u32 tmp;
5161         int i, j;
5162
5163         /* Initialize HDP */
5164         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5165                 WREG32((0x2c14 + j), 0x00000000);
5166                 WREG32((0x2c18 + j), 0x00000000);
5167                 WREG32((0x2c1c + j), 0x00000000);
5168                 WREG32((0x2c20 + j), 0x00000000);
5169                 WREG32((0x2c24 + j), 0x00000000);
5170         }
5171         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5172
5173         evergreen_mc_stop(rdev, &save);
5174         if (radeon_mc_wait_for_idle(rdev)) {
5175                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5176         }
5177         /* Lockout access through VGA aperture*/
5178         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5179         /* Update configuration */
5180         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5181                rdev->mc.vram_start >> 12);
5182         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5183                rdev->mc.vram_end >> 12);
5184         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5185                rdev->vram_scratch.gpu_addr >> 12);
5186         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5187         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5188         WREG32(MC_VM_FB_LOCATION, tmp);
5189         /* XXX double check these! */
5190         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5191         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5192         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5193         WREG32(MC_VM_AGP_BASE, 0);
5194         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5195         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5196         if (radeon_mc_wait_for_idle(rdev)) {
5197                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5198         }
5199         evergreen_mc_resume(rdev, &save);
5200         /* we need to own VRAM, so turn off the VGA renderer here
5201          * to stop it overwriting our objects */
5202         rv515_vga_render_disable(rdev);
5203 }
5204
5205 /**
5206  * cik_mc_init - initialize the memory controller driver params
5207  *
5208  * @rdev: radeon_device pointer
5209  *
5210  * Look up the amount of vram, vram width, and decide how to place
5211  * vram and gart within the GPU's physical address space (CIK).
5212  * Returns 0 for success.
5213  */
5214 static int cik_mc_init(struct radeon_device *rdev)
5215 {
5216         u32 tmp;
5217         int chansize, numchan;
5218
5219         /* Get VRAM informations */
5220         rdev->mc.vram_is_ddr = true;
5221         tmp = RREG32(MC_ARB_RAMCFG);
5222         if (tmp & CHANSIZE_MASK) {
5223                 chansize = 64;
5224         } else {
5225                 chansize = 32;
5226         }
5227         tmp = RREG32(MC_SHARED_CHMAP);
5228         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5229         case 0:
5230         default:
5231                 numchan = 1;
5232                 break;
5233         case 1:
5234                 numchan = 2;
5235                 break;
5236         case 2:
5237                 numchan = 4;
5238                 break;
5239         case 3:
5240                 numchan = 8;
5241                 break;
5242         case 4:
5243                 numchan = 3;
5244                 break;
5245         case 5:
5246                 numchan = 6;
5247                 break;
5248         case 6:
5249                 numchan = 10;
5250                 break;
5251         case 7:
5252                 numchan = 12;
5253                 break;
5254         case 8:
5255                 numchan = 16;
5256                 break;
5257         }
5258         rdev->mc.vram_width = numchan * chansize;
5259         /* Could aper size report 0 ? */
5260         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5261         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5262         /* size in MB on si */
5263         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5264         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5265         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5266         si_vram_gtt_location(rdev, &rdev->mc);
5267         radeon_update_bandwidth_info(rdev);
5268
5269         return 0;
5270 }
5271
5272 /*
5273  * GART
5274  * VMID 0 is the physical GPU addresses as used by the kernel.
5275  * VMIDs 1-15 are used for userspace clients and are handled
5276  * by the radeon vm/hsa code.
5277  */
5278 /**
5279  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5280  *
5281  * @rdev: radeon_device pointer
5282  *
5283  * Flush the TLB for the VMID 0 page table (CIK).
5284  */
5285 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5286 {
5287         /* flush hdp cache */
5288         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5289
5290         /* bits 0-15 are the VM contexts0-15 */
5291         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5292 }
5293
5294 /**
5295  * cik_pcie_gart_enable - gart enable
5296  *
5297  * @rdev: radeon_device pointer
5298  *
5299  * This sets up the TLBs, programs the page tables for VMID0,
5300  * sets up the hw for VMIDs 1-15 which are allocated on
5301  * demand, and sets up the global locations for the LDS, GDS,
5302  * and GPUVM for FSA64 clients (CIK).
5303  * Returns 0 for success, errors for failure.
5304  */
5305 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5306 {
5307         int r, i;
5308
5309         if (rdev->gart.robj == NULL) {
5310                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5311                 return -EINVAL;
5312         }
5313         r = radeon_gart_table_vram_pin(rdev);
5314         if (r)
5315                 return r;
5316         radeon_gart_restore(rdev);
5317         /* Setup TLB control */
5318         WREG32(MC_VM_MX_L1_TLB_CNTL,
5319                (0xA << 7) |
5320                ENABLE_L1_TLB |
5321                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5322                ENABLE_ADVANCED_DRIVER_MODEL |
5323                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5324         /* Setup L2 cache */
5325         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5326                ENABLE_L2_FRAGMENT_PROCESSING |
5327                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5328                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5329                EFFECTIVE_L2_QUEUE_SIZE(7) |
5330                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5331         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5332         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5333                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5334         /* setup context0 */
5335         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5336         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5337         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5338         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5339                         (u32)(rdev->dummy_page.addr >> 12));
5340         WREG32(VM_CONTEXT0_CNTL2, 0);
5341         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5342                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5343
5344         WREG32(0x15D4, 0);
5345         WREG32(0x15D8, 0);
5346         WREG32(0x15DC, 0);
5347
5348         /* empty context1-15 */
5349         /* FIXME start with 4G, once using 2 level pt switch to full
5350          * vm size space
5351          */
5352         /* set vm size, must be a multiple of 4 */
5353         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5354         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5355         for (i = 1; i < 16; i++) {
5356                 if (i < 8)
5357                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5358                                rdev->gart.table_addr >> 12);
5359                 else
5360                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5361                                rdev->gart.table_addr >> 12);
5362         }
5363
5364         /* enable context1-15 */
5365         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5366                (u32)(rdev->dummy_page.addr >> 12));
5367         WREG32(VM_CONTEXT1_CNTL2, 4);
5368         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5369                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5370                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5371                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5372                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5373                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5374                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5375                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5376                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5377                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5378                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5379                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5380                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5381
5382         if (rdev->family == CHIP_KAVERI) {
5383                 u32 tmp = RREG32(CHUB_CONTROL);
5384                 tmp &= ~BYPASS_VM;
5385                 WREG32(CHUB_CONTROL, tmp);
5386         }
5387
5388         /* XXX SH_MEM regs */
5389         /* where to put LDS, scratch, GPUVM in FSA64 space */
5390         mutex_lock(&rdev->srbm_mutex);
5391         for (i = 0; i < 16; i++) {
5392                 cik_srbm_select(rdev, 0, 0, 0, i);
5393                 /* CP and shaders */
5394                 WREG32(SH_MEM_CONFIG, 0);
5395                 WREG32(SH_MEM_APE1_BASE, 1);
5396                 WREG32(SH_MEM_APE1_LIMIT, 0);
5397                 WREG32(SH_MEM_BASES, 0);
5398                 /* SDMA GFX */
5399                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5400                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5401                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5402                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5403                 /* XXX SDMA RLC - todo */
5404         }
5405         cik_srbm_select(rdev, 0, 0, 0, 0);
5406         mutex_unlock(&rdev->srbm_mutex);
5407
5408         cik_pcie_gart_tlb_flush(rdev);
5409         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5410                  (unsigned)(rdev->mc.gtt_size >> 20),
5411                  (unsigned long long)rdev->gart.table_addr);
5412         rdev->gart.ready = true;
5413         return 0;
5414 }
5415
5416 /**
5417  * cik_pcie_gart_disable - gart disable
5418  *
5419  * @rdev: radeon_device pointer
5420  *
5421  * This disables all VM page table (CIK).
5422  */
5423 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5424 {
5425         /* Disable all tables */
5426         WREG32(VM_CONTEXT0_CNTL, 0);
5427         WREG32(VM_CONTEXT1_CNTL, 0);
5428         /* Setup TLB control */
5429         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5430                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5431         /* Setup L2 cache */
5432         WREG32(VM_L2_CNTL,
5433                ENABLE_L2_FRAGMENT_PROCESSING |
5434                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5435                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5436                EFFECTIVE_L2_QUEUE_SIZE(7) |
5437                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5438         WREG32(VM_L2_CNTL2, 0);
5439         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5440                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5441         radeon_gart_table_vram_unpin(rdev);
5442 }
5443
5444 /**
5445  * cik_pcie_gart_fini - vm fini callback
5446  *
5447  * @rdev: radeon_device pointer
5448  *
5449  * Tears down the driver GART/VM setup (CIK).
5450  */
5451 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5452 {
5453         cik_pcie_gart_disable(rdev);
5454         radeon_gart_table_vram_free(rdev);
5455         radeon_gart_fini(rdev);
5456 }
5457
5458 /* vm parser */
5459 /**
5460  * cik_ib_parse - vm ib_parse callback
5461  *
5462  * @rdev: radeon_device pointer
5463  * @ib: indirect buffer pointer
5464  *
5465  * CIK uses hw IB checking so this is a nop (CIK).
5466  */
5467 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5468 {
5469         return 0;
5470 }
5471
5472 /*
5473  * vm
5474  * VMID 0 is the physical GPU addresses as used by the kernel.
5475  * VMIDs 1-15 are used for userspace clients and are handled
5476  * by the radeon vm/hsa code.
5477  */
5478 /**
5479  * cik_vm_init - cik vm init callback
5480  *
5481  * @rdev: radeon_device pointer
5482  *
5483  * Inits cik specific vm parameters (number of VMs, base of vram for
5484  * VMIDs 1-15) (CIK).
5485  * Returns 0 for success.
5486  */
5487 int cik_vm_init(struct radeon_device *rdev)
5488 {
5489         /* number of VMs */
5490         rdev->vm_manager.nvm = 16;
5491         /* base offset of vram pages */
5492         if (rdev->flags & RADEON_IS_IGP) {
5493                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5494                 tmp <<= 22;
5495                 rdev->vm_manager.vram_base_offset = tmp;
5496         } else
5497                 rdev->vm_manager.vram_base_offset = 0;
5498
5499         return 0;
5500 }
5501
5502 /**
5503  * cik_vm_fini - cik vm fini callback
5504  *
5505  * @rdev: radeon_device pointer
5506  *
5507  * Tear down any asic specific VM setup (CIK).
5508  */
5509 void cik_vm_fini(struct radeon_device *rdev)
5510 {
5511 }
5512
5513 /**
5514  * cik_vm_decode_fault - print human readable fault info
5515  *
5516  * @rdev: radeon_device pointer
5517  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5518  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5519  *
5520  * Print human readable fault information (CIK).
5521  */
5522 static void cik_vm_decode_fault(struct radeon_device *rdev,
5523                                 u32 status, u32 addr, u32 mc_client)
5524 {
5525         u32 mc_id;
5526         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5527         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5528         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5529                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5530
5531         if (rdev->family == CHIP_HAWAII)
5532                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5533         else
5534                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5535
5536         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5537                protections, vmid, addr,
5538                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5539                block, mc_client, mc_id);
5540 }
5541
5542 /**
5543  * cik_vm_flush - cik vm flush using the CP
5544  *
5545  * @rdev: radeon_device pointer
5546  *
5547  * Update the page table base and flush the VM TLB
5548  * using the CP (CIK).
5549  */
5550 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5551 {
5552         struct radeon_ring *ring = &rdev->ring[ridx];
5553
5554         if (vm == NULL)
5555                 return;
5556
5557         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5558         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5559                                  WRITE_DATA_DST_SEL(0)));
5560         if (vm->id < 8) {
5561                 radeon_ring_write(ring,
5562                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5563         } else {
5564                 radeon_ring_write(ring,
5565                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5566         }
5567         radeon_ring_write(ring, 0);
5568         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5569
5570         /* update SH_MEM_* regs */
5571         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5572         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5573                                  WRITE_DATA_DST_SEL(0)));
5574         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5575         radeon_ring_write(ring, 0);
5576         radeon_ring_write(ring, VMID(vm->id));
5577
5578         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5579         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5580                                  WRITE_DATA_DST_SEL(0)));
5581         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5582         radeon_ring_write(ring, 0);
5583
5584         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5585         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5586         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5587         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5588
5589         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5590         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5591                                  WRITE_DATA_DST_SEL(0)));
5592         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5593         radeon_ring_write(ring, 0);
5594         radeon_ring_write(ring, VMID(0));
5595
5596         /* HDP flush */
5597         cik_hdp_flush_cp_ring_emit(rdev, ridx);
5598
5599         /* bits 0-15 are the VM contexts0-15 */
5600         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5601         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5602                                  WRITE_DATA_DST_SEL(0)));
5603         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5604         radeon_ring_write(ring, 0);
5605         radeon_ring_write(ring, 1 << vm->id);
5606
5607         /* compute doesn't have PFP */
5608         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5609                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5610                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5611                 radeon_ring_write(ring, 0x0);
5612         }
5613 }
5614
5615 /*
5616  * RLC
5617  * The RLC is a multi-purpose microengine that handles a
5618  * variety of functions, the most important of which is
5619  * the interrupt controller.
5620  */
5621 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5622                                           bool enable)
5623 {
5624         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5625
5626         if (enable)
5627                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5628         else
5629                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5630         WREG32(CP_INT_CNTL_RING0, tmp);
5631 }
5632
5633 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5634 {
5635         u32 tmp;
5636
5637         tmp = RREG32(RLC_LB_CNTL);
5638         if (enable)
5639                 tmp |= LOAD_BALANCE_ENABLE;
5640         else
5641                 tmp &= ~LOAD_BALANCE_ENABLE;
5642         WREG32(RLC_LB_CNTL, tmp);
5643 }
5644
5645 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5646 {
5647         u32 i, j, k;
5648         u32 mask;
5649
5650         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5651                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5652                         cik_select_se_sh(rdev, i, j);
5653                         for (k = 0; k < rdev->usec_timeout; k++) {
5654                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5655                                         break;
5656                                 udelay(1);
5657                         }
5658                 }
5659         }
5660         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5661
5662         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5663         for (k = 0; k < rdev->usec_timeout; k++) {
5664                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5665                         break;
5666                 udelay(1);
5667         }
5668 }
5669
5670 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5671 {
5672         u32 tmp;
5673
5674         tmp = RREG32(RLC_CNTL);
5675         if (tmp != rlc)
5676                 WREG32(RLC_CNTL, rlc);
5677 }
5678
5679 static u32 cik_halt_rlc(struct radeon_device *rdev)
5680 {
5681         u32 data, orig;
5682
5683         orig = data = RREG32(RLC_CNTL);
5684
5685         if (data & RLC_ENABLE) {
5686                 u32 i;
5687
5688                 data &= ~RLC_ENABLE;
5689                 WREG32(RLC_CNTL, data);
5690
5691                 for (i = 0; i < rdev->usec_timeout; i++) {
5692                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5693                                 break;
5694                         udelay(1);
5695                 }
5696
5697                 cik_wait_for_rlc_serdes(rdev);
5698         }
5699
5700         return orig;
5701 }
5702
5703 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5704 {
5705         u32 tmp, i, mask;
5706
5707         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5708         WREG32(RLC_GPR_REG2, tmp);
5709
5710         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5711         for (i = 0; i < rdev->usec_timeout; i++) {
5712                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5713                         break;
5714                 udelay(1);
5715         }
5716
5717         for (i = 0; i < rdev->usec_timeout; i++) {
5718                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5719                         break;
5720                 udelay(1);
5721         }
5722 }
5723
5724 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5725 {
5726         u32 tmp;
5727
5728         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5729         WREG32(RLC_GPR_REG2, tmp);
5730 }
5731
5732 /**
5733  * cik_rlc_stop - stop the RLC ME
5734  *
5735  * @rdev: radeon_device pointer
5736  *
5737  * Halt the RLC ME (MicroEngine) (CIK).
5738  */
5739 static void cik_rlc_stop(struct radeon_device *rdev)
5740 {
5741         WREG32(RLC_CNTL, 0);
5742
5743         cik_enable_gui_idle_interrupt(rdev, false);
5744
5745         cik_wait_for_rlc_serdes(rdev);
5746 }
5747
5748 /**
5749  * cik_rlc_start - start the RLC ME
5750  *
5751  * @rdev: radeon_device pointer
5752  *
5753  * Unhalt the RLC ME (MicroEngine) (CIK).
5754  */
5755 static void cik_rlc_start(struct radeon_device *rdev)
5756 {
5757         WREG32(RLC_CNTL, RLC_ENABLE);
5758
5759         cik_enable_gui_idle_interrupt(rdev, true);
5760
5761         udelay(50);
5762 }
5763
5764 /**
5765  * cik_rlc_resume - setup the RLC hw
5766  *
5767  * @rdev: radeon_device pointer
5768  *
5769  * Initialize the RLC registers, load the ucode,
5770  * and start the RLC (CIK).
5771  * Returns 0 for success, -EINVAL if the ucode is not available.
5772  */
5773 static int cik_rlc_resume(struct radeon_device *rdev)
5774 {
5775         u32 i, size, tmp;
5776         const __be32 *fw_data;
5777
5778         if (!rdev->rlc_fw)
5779                 return -EINVAL;
5780
5781         switch (rdev->family) {
5782         case CHIP_BONAIRE:
5783         case CHIP_HAWAII:
5784         default:
5785                 size = BONAIRE_RLC_UCODE_SIZE;
5786                 break;
5787         case CHIP_KAVERI:
5788                 size = KV_RLC_UCODE_SIZE;
5789                 break;
5790         case CHIP_KABINI:
5791                 size = KB_RLC_UCODE_SIZE;
5792                 break;
5793         }
5794
5795         cik_rlc_stop(rdev);
5796
5797         /* disable CG */
5798         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5799         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5800
5801         si_rlc_reset(rdev);
5802
5803         cik_init_pg(rdev);
5804
5805         cik_init_cg(rdev);
5806
5807         WREG32(RLC_LB_CNTR_INIT, 0);
5808         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5809
5810         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5811         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5812         WREG32(RLC_LB_PARAMS, 0x00600408);
5813         WREG32(RLC_LB_CNTL, 0x80000004);
5814
5815         WREG32(RLC_MC_CNTL, 0);
5816         WREG32(RLC_UCODE_CNTL, 0);
5817
5818         fw_data = (const __be32 *)rdev->rlc_fw->data;
5819                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5820         for (i = 0; i < size; i++)
5821                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5822         WREG32(RLC_GPM_UCODE_ADDR, 0);
5823
5824         /* XXX - find out what chips support lbpw */
5825         cik_enable_lbpw(rdev, false);
5826
5827         if (rdev->family == CHIP_BONAIRE)
5828                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5829
5830         cik_rlc_start(rdev);
5831
5832         return 0;
5833 }
5834
5835 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5836 {
5837         u32 data, orig, tmp, tmp2;
5838
5839         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5840
5841         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5842                 cik_enable_gui_idle_interrupt(rdev, true);
5843
5844                 tmp = cik_halt_rlc(rdev);
5845
5846                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5847                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5848                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5849                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5850                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5851
5852                 cik_update_rlc(rdev, tmp);
5853
5854                 data |= CGCG_EN | CGLS_EN;
5855         } else {
5856                 cik_enable_gui_idle_interrupt(rdev, false);
5857
5858                 RREG32(CB_CGTT_SCLK_CTRL);
5859                 RREG32(CB_CGTT_SCLK_CTRL);
5860                 RREG32(CB_CGTT_SCLK_CTRL);
5861                 RREG32(CB_CGTT_SCLK_CTRL);
5862
5863                 data &= ~(CGCG_EN | CGLS_EN);
5864         }
5865
5866         if (orig != data)
5867                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5868
5869 }
5870
5871 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5872 {
5873         u32 data, orig, tmp = 0;
5874
5875         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5876                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5877                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5878                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5879                                 data |= CP_MEM_LS_EN;
5880                                 if (orig != data)
5881                                         WREG32(CP_MEM_SLP_CNTL, data);
5882                         }
5883                 }
5884
5885                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5886                 data &= 0xfffffffd;
5887                 if (orig != data)
5888                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5889
5890                 tmp = cik_halt_rlc(rdev);
5891
5892                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5893                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5894                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5895                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5896                 WREG32(RLC_SERDES_WR_CTRL, data);
5897
5898                 cik_update_rlc(rdev, tmp);
5899
5900                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5901                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5902                         data &= ~SM_MODE_MASK;
5903                         data |= SM_MODE(0x2);
5904                         data |= SM_MODE_ENABLE;
5905                         data &= ~CGTS_OVERRIDE;
5906                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5907                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5908                                 data &= ~CGTS_LS_OVERRIDE;
5909                         data &= ~ON_MONITOR_ADD_MASK;
5910                         data |= ON_MONITOR_ADD_EN;
5911                         data |= ON_MONITOR_ADD(0x96);
5912                         if (orig != data)
5913                                 WREG32(CGTS_SM_CTRL_REG, data);
5914                 }
5915         } else {
5916                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5917                 data |= 0x00000002;
5918                 if (orig != data)
5919                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5920
5921                 data = RREG32(RLC_MEM_SLP_CNTL);
5922                 if (data & RLC_MEM_LS_EN) {
5923                         data &= ~RLC_MEM_LS_EN;
5924                         WREG32(RLC_MEM_SLP_CNTL, data);
5925                 }
5926
5927                 data = RREG32(CP_MEM_SLP_CNTL);
5928                 if (data & CP_MEM_LS_EN) {
5929                         data &= ~CP_MEM_LS_EN;
5930                         WREG32(CP_MEM_SLP_CNTL, data);
5931                 }
5932
5933                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5934                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5935                 if (orig != data)
5936                         WREG32(CGTS_SM_CTRL_REG, data);
5937
5938                 tmp = cik_halt_rlc(rdev);
5939
5940                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5941                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5942                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5943                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5944                 WREG32(RLC_SERDES_WR_CTRL, data);
5945
5946                 cik_update_rlc(rdev, tmp);
5947         }
5948 }
5949
5950 static const u32 mc_cg_registers[] =
5951 {
5952         MC_HUB_MISC_HUB_CG,
5953         MC_HUB_MISC_SIP_CG,
5954         MC_HUB_MISC_VM_CG,
5955         MC_XPB_CLK_GAT,
5956         ATC_MISC_CG,
5957         MC_CITF_MISC_WR_CG,
5958         MC_CITF_MISC_RD_CG,
5959         MC_CITF_MISC_VM_CG,
5960         VM_L2_CG,
5961 };
5962
5963 static void cik_enable_mc_ls(struct radeon_device *rdev,
5964                              bool enable)
5965 {
5966         int i;
5967         u32 orig, data;
5968
5969         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5970                 orig = data = RREG32(mc_cg_registers[i]);
5971                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5972                         data |= MC_LS_ENABLE;
5973                 else
5974                         data &= ~MC_LS_ENABLE;
5975                 if (data != orig)
5976                         WREG32(mc_cg_registers[i], data);
5977         }
5978 }
5979
5980 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5981                                bool enable)
5982 {
5983         int i;
5984         u32 orig, data;
5985
5986         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5987                 orig = data = RREG32(mc_cg_registers[i]);
5988                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5989                         data |= MC_CG_ENABLE;
5990                 else
5991                         data &= ~MC_CG_ENABLE;
5992                 if (data != orig)
5993                         WREG32(mc_cg_registers[i], data);
5994         }
5995 }
5996
5997 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5998                                  bool enable)
5999 {
6000         u32 orig, data;
6001
6002         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6003                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6004                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6005         } else {
6006                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6007                 data |= 0xff000000;
6008                 if (data != orig)
6009                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6010
6011                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6012                 data |= 0xff000000;
6013                 if (data != orig)
6014                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6015         }
6016 }
6017
6018 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6019                                  bool enable)
6020 {
6021         u32 orig, data;
6022
6023         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6024                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6025                 data |= 0x100;
6026                 if (orig != data)
6027                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6028
6029                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6030                 data |= 0x100;
6031                 if (orig != data)
6032                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6033         } else {
6034                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6035                 data &= ~0x100;
6036                 if (orig != data)
6037                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6038
6039                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6040                 data &= ~0x100;
6041                 if (orig != data)
6042                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6043         }
6044 }
6045
6046 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6047                                 bool enable)
6048 {
6049         u32 orig, data;
6050
6051         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6052                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6053                 data = 0xfff;
6054                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6055
6056                 orig = data = RREG32(UVD_CGC_CTRL);
6057                 data |= DCM;
6058                 if (orig != data)
6059                         WREG32(UVD_CGC_CTRL, data);
6060         } else {
6061                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6062                 data &= ~0xfff;
6063                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6064
6065                 orig = data = RREG32(UVD_CGC_CTRL);
6066                 data &= ~DCM;
6067                 if (orig != data)
6068                         WREG32(UVD_CGC_CTRL, data);
6069         }
6070 }
6071
6072 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6073                                bool enable)
6074 {
6075         u32 orig, data;
6076
6077         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6078
6079         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6080                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6081                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6082         else
6083                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6084                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6085
6086         if (orig != data)
6087                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6088 }
6089
6090 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6091                                 bool enable)
6092 {
6093         u32 orig, data;
6094
6095         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6096
6097         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6098                 data &= ~CLOCK_GATING_DIS;
6099         else
6100                 data |= CLOCK_GATING_DIS;
6101
6102         if (orig != data)
6103                 WREG32(HDP_HOST_PATH_CNTL, data);
6104 }
6105
6106 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6107                               bool enable)
6108 {
6109         u32 orig, data;
6110
6111         orig = data = RREG32(HDP_MEM_POWER_LS);
6112
6113         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6114                 data |= HDP_LS_ENABLE;
6115         else
6116                 data &= ~HDP_LS_ENABLE;
6117
6118         if (orig != data)
6119                 WREG32(HDP_MEM_POWER_LS, data);
6120 }
6121
6122 void cik_update_cg(struct radeon_device *rdev,
6123                    u32 block, bool enable)
6124 {
6125
6126         if (block & RADEON_CG_BLOCK_GFX) {
6127                 cik_enable_gui_idle_interrupt(rdev, false);
6128                 /* order matters! */
6129                 if (enable) {
6130                         cik_enable_mgcg(rdev, true);
6131                         cik_enable_cgcg(rdev, true);
6132                 } else {
6133                         cik_enable_cgcg(rdev, false);
6134                         cik_enable_mgcg(rdev, false);
6135                 }
6136                 cik_enable_gui_idle_interrupt(rdev, true);
6137         }
6138
6139         if (block & RADEON_CG_BLOCK_MC) {
6140                 if (!(rdev->flags & RADEON_IS_IGP)) {
6141                         cik_enable_mc_mgcg(rdev, enable);
6142                         cik_enable_mc_ls(rdev, enable);
6143                 }
6144         }
6145
6146         if (block & RADEON_CG_BLOCK_SDMA) {
6147                 cik_enable_sdma_mgcg(rdev, enable);
6148                 cik_enable_sdma_mgls(rdev, enable);
6149         }
6150
6151         if (block & RADEON_CG_BLOCK_BIF) {
6152                 cik_enable_bif_mgls(rdev, enable);
6153         }
6154
6155         if (block & RADEON_CG_BLOCK_UVD) {
6156                 if (rdev->has_uvd)
6157                         cik_enable_uvd_mgcg(rdev, enable);
6158         }
6159
6160         if (block & RADEON_CG_BLOCK_HDP) {
6161                 cik_enable_hdp_mgcg(rdev, enable);
6162                 cik_enable_hdp_ls(rdev, enable);
6163         }
6164
6165         if (block & RADEON_CG_BLOCK_VCE) {
6166                 vce_v2_0_enable_mgcg(rdev, enable);
6167         }
6168 }
6169
6170 static void cik_init_cg(struct radeon_device *rdev)
6171 {
6172
6173         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6174
6175         if (rdev->has_uvd)
6176                 si_init_uvd_internal_cg(rdev);
6177
6178         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6179                              RADEON_CG_BLOCK_SDMA |
6180                              RADEON_CG_BLOCK_BIF |
6181                              RADEON_CG_BLOCK_UVD |
6182                              RADEON_CG_BLOCK_HDP), true);
6183 }
6184
6185 static void cik_fini_cg(struct radeon_device *rdev)
6186 {
6187         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6188                              RADEON_CG_BLOCK_SDMA |
6189                              RADEON_CG_BLOCK_BIF |
6190                              RADEON_CG_BLOCK_UVD |
6191                              RADEON_CG_BLOCK_HDP), false);
6192
6193         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6194 }
6195
6196 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6197                                           bool enable)
6198 {
6199         u32 data, orig;
6200
6201         orig = data = RREG32(RLC_PG_CNTL);
6202         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6203                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6204         else
6205                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6206         if (orig != data)
6207                 WREG32(RLC_PG_CNTL, data);
6208 }
6209
6210 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6211                                           bool enable)
6212 {
6213         u32 data, orig;
6214
6215         orig = data = RREG32(RLC_PG_CNTL);
6216         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6217                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6218         else
6219                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6220         if (orig != data)
6221                 WREG32(RLC_PG_CNTL, data);
6222 }
6223
6224 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6225 {
6226         u32 data, orig;
6227
6228         orig = data = RREG32(RLC_PG_CNTL);
6229         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6230                 data &= ~DISABLE_CP_PG;
6231         else
6232                 data |= DISABLE_CP_PG;
6233         if (orig != data)
6234                 WREG32(RLC_PG_CNTL, data);
6235 }
6236
6237 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6238 {
6239         u32 data, orig;
6240
6241         orig = data = RREG32(RLC_PG_CNTL);
6242         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6243                 data &= ~DISABLE_GDS_PG;
6244         else
6245                 data |= DISABLE_GDS_PG;
6246         if (orig != data)
6247                 WREG32(RLC_PG_CNTL, data);
6248 }
6249
6250 #define CP_ME_TABLE_SIZE    96
6251 #define CP_ME_TABLE_OFFSET  2048
6252 #define CP_MEC_TABLE_OFFSET 4096
6253
6254 void cik_init_cp_pg_table(struct radeon_device *rdev)
6255 {
6256         const __be32 *fw_data;
6257         volatile u32 *dst_ptr;
6258         int me, i, max_me = 4;
6259         u32 bo_offset = 0;
6260         u32 table_offset;
6261
6262         if (rdev->family == CHIP_KAVERI)
6263                 max_me = 5;
6264
6265         if (rdev->rlc.cp_table_ptr == NULL)
6266                 return;
6267
6268         /* write the cp table buffer */
6269         dst_ptr = rdev->rlc.cp_table_ptr;
6270         for (me = 0; me < max_me; me++) {
6271                 if (me == 0) {
6272                         fw_data = (const __be32 *)rdev->ce_fw->data;
6273                         table_offset = CP_ME_TABLE_OFFSET;
6274                 } else if (me == 1) {
6275                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6276                         table_offset = CP_ME_TABLE_OFFSET;
6277                 } else if (me == 2) {
6278                         fw_data = (const __be32 *)rdev->me_fw->data;
6279                         table_offset = CP_ME_TABLE_OFFSET;
6280                 } else {
6281                         fw_data = (const __be32 *)rdev->mec_fw->data;
6282                         table_offset = CP_MEC_TABLE_OFFSET;
6283                 }
6284
6285                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6286                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6287                 }
6288                 bo_offset += CP_ME_TABLE_SIZE;
6289         }
6290 }
6291
6292 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6293                                 bool enable)
6294 {
6295         u32 data, orig;
6296
6297         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6298                 orig = data = RREG32(RLC_PG_CNTL);
6299                 data |= GFX_PG_ENABLE;
6300                 if (orig != data)
6301                         WREG32(RLC_PG_CNTL, data);
6302
6303                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6304                 data |= AUTO_PG_EN;
6305                 if (orig != data)
6306                         WREG32(RLC_AUTO_PG_CTRL, data);
6307         } else {
6308                 orig = data = RREG32(RLC_PG_CNTL);
6309                 data &= ~GFX_PG_ENABLE;
6310                 if (orig != data)
6311                         WREG32(RLC_PG_CNTL, data);
6312
6313                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6314                 data &= ~AUTO_PG_EN;
6315                 if (orig != data)
6316                         WREG32(RLC_AUTO_PG_CTRL, data);
6317
6318                 data = RREG32(DB_RENDER_CONTROL);
6319         }
6320 }
6321
6322 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6323 {
6324         u32 mask = 0, tmp, tmp1;
6325         int i;
6326
6327         cik_select_se_sh(rdev, se, sh);
6328         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6329         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6330         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6331
6332         tmp &= 0xffff0000;
6333
6334         tmp |= tmp1;
6335         tmp >>= 16;
6336
6337         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6338                 mask <<= 1;
6339                 mask |= 1;
6340         }
6341
6342         return (~tmp) & mask;
6343 }
6344
6345 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6346 {
6347         u32 i, j, k, active_cu_number = 0;
6348         u32 mask, counter, cu_bitmap;
6349         u32 tmp = 0;
6350
6351         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6352                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6353                         mask = 1;
6354                         cu_bitmap = 0;
6355                         counter = 0;
6356                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6357                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6358                                         if (counter < 2)
6359                                                 cu_bitmap |= mask;
6360                                         counter ++;
6361                                 }
6362                                 mask <<= 1;
6363                         }
6364
6365                         active_cu_number += counter;
6366                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6367                 }
6368         }
6369
6370         WREG32(RLC_PG_AO_CU_MASK, tmp);
6371
6372         tmp = RREG32(RLC_MAX_PG_CU);
6373         tmp &= ~MAX_PU_CU_MASK;
6374         tmp |= MAX_PU_CU(active_cu_number);
6375         WREG32(RLC_MAX_PG_CU, tmp);
6376 }
6377
6378 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6379                                        bool enable)
6380 {
6381         u32 data, orig;
6382
6383         orig = data = RREG32(RLC_PG_CNTL);
6384         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6385                 data |= STATIC_PER_CU_PG_ENABLE;
6386         else
6387                 data &= ~STATIC_PER_CU_PG_ENABLE;
6388         if (orig != data)
6389                 WREG32(RLC_PG_CNTL, data);
6390 }
6391
6392 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6393                                         bool enable)
6394 {
6395         u32 data, orig;
6396
6397         orig = data = RREG32(RLC_PG_CNTL);
6398         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6399                 data |= DYN_PER_CU_PG_ENABLE;
6400         else
6401                 data &= ~DYN_PER_CU_PG_ENABLE;
6402         if (orig != data)
6403                 WREG32(RLC_PG_CNTL, data);
6404 }
6405
6406 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6407 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6408
6409 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6410 {
6411         u32 data, orig;
6412         u32 i;
6413
6414         if (rdev->rlc.cs_data) {
6415                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6416                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6417                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6418                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6419         } else {
6420                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6421                 for (i = 0; i < 3; i++)
6422                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6423         }
6424         if (rdev->rlc.reg_list) {
6425                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6426                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6427                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6428         }
6429
6430         orig = data = RREG32(RLC_PG_CNTL);
6431         data |= GFX_PG_SRC;
6432         if (orig != data)
6433                 WREG32(RLC_PG_CNTL, data);
6434
6435         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6436         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6437
6438         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6439         data &= ~IDLE_POLL_COUNT_MASK;
6440         data |= IDLE_POLL_COUNT(0x60);
6441         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6442
6443         data = 0x10101010;
6444         WREG32(RLC_PG_DELAY, data);
6445
6446         data = RREG32(RLC_PG_DELAY_2);
6447         data &= ~0xff;
6448         data |= 0x3;
6449         WREG32(RLC_PG_DELAY_2, data);
6450
6451         data = RREG32(RLC_AUTO_PG_CTRL);
6452         data &= ~GRBM_REG_SGIT_MASK;
6453         data |= GRBM_REG_SGIT(0x700);
6454         WREG32(RLC_AUTO_PG_CTRL, data);
6455
6456 }
6457
6458 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6459 {
6460         cik_enable_gfx_cgpg(rdev, enable);
6461         cik_enable_gfx_static_mgpg(rdev, enable);
6462         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6463 }
6464
6465 u32 cik_get_csb_size(struct radeon_device *rdev)
6466 {
6467         u32 count = 0;
6468         const struct cs_section_def *sect = NULL;
6469         const struct cs_extent_def *ext = NULL;
6470
6471         if (rdev->rlc.cs_data == NULL)
6472                 return 0;
6473
6474         /* begin clear state */
6475         count += 2;
6476         /* context control state */
6477         count += 3;
6478
6479         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6480                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6481                         if (sect->id == SECT_CONTEXT)
6482                                 count += 2 + ext->reg_count;
6483                         else
6484                                 return 0;
6485                 }
6486         }
6487         /* pa_sc_raster_config/pa_sc_raster_config1 */
6488         count += 4;
6489         /* end clear state */
6490         count += 2;
6491         /* clear state */
6492         count += 2;
6493
6494         return count;
6495 }
6496
6497 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6498 {
6499         u32 count = 0, i;
6500         const struct cs_section_def *sect = NULL;
6501         const struct cs_extent_def *ext = NULL;
6502
6503         if (rdev->rlc.cs_data == NULL)
6504                 return;
6505         if (buffer == NULL)
6506                 return;
6507
6508         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6509         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6510
6511         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6512         buffer[count++] = cpu_to_le32(0x80000000);
6513         buffer[count++] = cpu_to_le32(0x80000000);
6514
6515         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6516                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6517                         if (sect->id == SECT_CONTEXT) {
6518                                 buffer[count++] =
6519                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6520                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6521                                 for (i = 0; i < ext->reg_count; i++)
6522                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6523                         } else {
6524                                 return;
6525                         }
6526                 }
6527         }
6528
6529         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6530         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6531         switch (rdev->family) {
6532         case CHIP_BONAIRE:
6533                 buffer[count++] = cpu_to_le32(0x16000012);
6534                 buffer[count++] = cpu_to_le32(0x00000000);
6535                 break;
6536         case CHIP_KAVERI:
6537                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6538                 buffer[count++] = cpu_to_le32(0x00000000);
6539                 break;
6540         case CHIP_KABINI:
6541                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6542                 buffer[count++] = cpu_to_le32(0x00000000);
6543                 break;
6544         case CHIP_HAWAII:
6545                 buffer[count++] = 0x3a00161a;
6546                 buffer[count++] = 0x0000002e;
6547                 break;
6548         default:
6549                 buffer[count++] = cpu_to_le32(0x00000000);
6550                 buffer[count++] = cpu_to_le32(0x00000000);
6551                 break;
6552         }
6553
6554         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6555         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6556
6557         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6558         buffer[count++] = cpu_to_le32(0);
6559 }
6560
6561 static void cik_init_pg(struct radeon_device *rdev)
6562 {
6563         if (rdev->pg_flags) {
6564                 cik_enable_sck_slowdown_on_pu(rdev, true);
6565                 cik_enable_sck_slowdown_on_pd(rdev, true);
6566                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6567                         cik_init_gfx_cgpg(rdev);
6568                         cik_enable_cp_pg(rdev, true);
6569                         cik_enable_gds_pg(rdev, true);
6570                 }
6571                 cik_init_ao_cu_mask(rdev);
6572                 cik_update_gfx_pg(rdev, true);
6573         }
6574 }
6575
6576 static void cik_fini_pg(struct radeon_device *rdev)
6577 {
6578         if (rdev->pg_flags) {
6579                 cik_update_gfx_pg(rdev, false);
6580                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6581                         cik_enable_cp_pg(rdev, false);
6582                         cik_enable_gds_pg(rdev, false);
6583                 }
6584         }
6585 }
6586
6587 /*
6588  * Interrupts
6589  * Starting with r6xx, interrupts are handled via a ring buffer.
6590  * Ring buffers are areas of GPU accessible memory that the GPU
6591  * writes interrupt vectors into and the host reads vectors out of.
6592  * There is a rptr (read pointer) that determines where the
6593  * host is currently reading, and a wptr (write pointer)
6594  * which determines where the GPU has written.  When the
6595  * pointers are equal, the ring is idle.  When the GPU
6596  * writes vectors to the ring buffer, it increments the
6597  * wptr.  When there is an interrupt, the host then starts
6598  * fetching commands and processing them until the pointers are
6599  * equal again at which point it updates the rptr.
6600  */
6601
6602 /**
6603  * cik_enable_interrupts - Enable the interrupt ring buffer
6604  *
6605  * @rdev: radeon_device pointer
6606  *
6607  * Enable the interrupt ring buffer (CIK).
6608  */
6609 static void cik_enable_interrupts(struct radeon_device *rdev)
6610 {
6611         u32 ih_cntl = RREG32(IH_CNTL);
6612         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6613
6614         ih_cntl |= ENABLE_INTR;
6615         ih_rb_cntl |= IH_RB_ENABLE;
6616         WREG32(IH_CNTL, ih_cntl);
6617         WREG32(IH_RB_CNTL, ih_rb_cntl);
6618         rdev->ih.enabled = true;
6619 }
6620
6621 /**
6622  * cik_disable_interrupts - Disable the interrupt ring buffer
6623  *
6624  * @rdev: radeon_device pointer
6625  *
6626  * Disable the interrupt ring buffer (CIK).
6627  */
6628 static void cik_disable_interrupts(struct radeon_device *rdev)
6629 {
6630         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6631         u32 ih_cntl = RREG32(IH_CNTL);
6632
6633         ih_rb_cntl &= ~IH_RB_ENABLE;
6634         ih_cntl &= ~ENABLE_INTR;
6635         WREG32(IH_RB_CNTL, ih_rb_cntl);
6636         WREG32(IH_CNTL, ih_cntl);
6637         /* set rptr, wptr to 0 */
6638         WREG32(IH_RB_RPTR, 0);
6639         WREG32(IH_RB_WPTR, 0);
6640         rdev->ih.enabled = false;
6641         rdev->ih.rptr = 0;
6642 }
6643
6644 /**
6645  * cik_disable_interrupt_state - Disable all interrupt sources
6646  *
6647  * @rdev: radeon_device pointer
6648  *
6649  * Clear all interrupt enable bits used by the driver (CIK).
6650  */
6651 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6652 {
6653         u32 tmp;
6654
6655         /* gfx ring */
6656         tmp = RREG32(CP_INT_CNTL_RING0) &
6657                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6658         WREG32(CP_INT_CNTL_RING0, tmp);
6659         /* sdma */
6660         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6661         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6662         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6663         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6664         /* compute queues */
6665         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6666         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6667         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6668         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6669         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6670         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6671         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6672         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6673         /* grbm */
6674         WREG32(GRBM_INT_CNTL, 0);
6675         /* vline/vblank, etc. */
6676         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6677         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6678         if (rdev->num_crtc >= 4) {
6679                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6680                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6681         }
6682         if (rdev->num_crtc >= 6) {
6683                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6684                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6685         }
6686
6687         /* dac hotplug */
6688         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6689
6690         /* digital hotplug */
6691         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6692         WREG32(DC_HPD1_INT_CONTROL, tmp);
6693         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6694         WREG32(DC_HPD2_INT_CONTROL, tmp);
6695         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6696         WREG32(DC_HPD3_INT_CONTROL, tmp);
6697         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6698         WREG32(DC_HPD4_INT_CONTROL, tmp);
6699         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6700         WREG32(DC_HPD5_INT_CONTROL, tmp);
6701         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6702         WREG32(DC_HPD6_INT_CONTROL, tmp);
6703
6704 }
6705
6706 /**
6707  * cik_irq_init - init and enable the interrupt ring
6708  *
6709  * @rdev: radeon_device pointer
6710  *
6711  * Allocate a ring buffer for the interrupt controller,
6712  * enable the RLC, disable interrupts, enable the IH
6713  * ring buffer and enable it (CIK).
6714  * Called at device load and reume.
6715  * Returns 0 for success, errors for failure.
6716  */
6717 static int cik_irq_init(struct radeon_device *rdev)
6718 {
6719         int ret = 0;
6720         int rb_bufsz;
6721         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6722
6723         /* allocate ring */
6724         ret = r600_ih_ring_alloc(rdev);
6725         if (ret)
6726                 return ret;
6727
6728         /* disable irqs */
6729         cik_disable_interrupts(rdev);
6730
6731         /* init rlc */
6732         ret = cik_rlc_resume(rdev);
6733         if (ret) {
6734                 r600_ih_ring_fini(rdev);
6735                 return ret;
6736         }
6737
6738         /* setup interrupt control */
6739         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6740         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6741         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6742         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6743          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6744          */
6745         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6746         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6747         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6748         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6749
6750         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6751         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6752
6753         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6754                       IH_WPTR_OVERFLOW_CLEAR |
6755                       (rb_bufsz << 1));
6756
6757         if (rdev->wb.enabled)
6758                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6759
6760         /* set the writeback address whether it's enabled or not */
6761         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6762         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6763
6764         WREG32(IH_RB_CNTL, ih_rb_cntl);
6765
6766         /* set rptr, wptr to 0 */
6767         WREG32(IH_RB_RPTR, 0);
6768         WREG32(IH_RB_WPTR, 0);
6769
6770         /* Default settings for IH_CNTL (disabled at first) */
6771         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6772         /* RPTR_REARM only works if msi's are enabled */
6773         if (rdev->msi_enabled)
6774                 ih_cntl |= RPTR_REARM;
6775         WREG32(IH_CNTL, ih_cntl);
6776
6777         /* force the active interrupt state to all disabled */
6778         cik_disable_interrupt_state(rdev);
6779
6780         pci_set_master(rdev->pdev);
6781
6782         /* enable irqs */
6783         cik_enable_interrupts(rdev);
6784
6785         return ret;
6786 }
6787
6788 /**
6789  * cik_irq_set - enable/disable interrupt sources
6790  *
6791  * @rdev: radeon_device pointer
6792  *
6793  * Enable interrupt sources on the GPU (vblanks, hpd,
6794  * etc.) (CIK).
6795  * Returns 0 for success, errors for failure.
6796  */
6797 int cik_irq_set(struct radeon_device *rdev)
6798 {
6799         u32 cp_int_cntl;
6800         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6801         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6802         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6803         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6804         u32 grbm_int_cntl = 0;
6805         u32 dma_cntl, dma_cntl1;
6806         u32 thermal_int;
6807
6808         if (!rdev->irq.installed) {
6809                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6810                 return -EINVAL;
6811         }
6812         /* don't enable anything if the ih is disabled */
6813         if (!rdev->ih.enabled) {
6814                 cik_disable_interrupts(rdev);
6815                 /* force the active interrupt state to all disabled */
6816                 cik_disable_interrupt_state(rdev);
6817                 return 0;
6818         }
6819
6820         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6821                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6822         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6823
6824         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6825         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6826         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6827         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6828         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6829         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6830
6831         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6832         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6833
6834         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6835         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6836         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6837         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6838         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6839         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6840         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6841         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6842
6843         if (rdev->flags & RADEON_IS_IGP)
6844                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6845                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6846         else
6847                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6848                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6849
6850         /* enable CP interrupts on all rings */
6851         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6852                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6853                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6854         }
6855         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6856                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6857                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6858                 if (ring->me == 1) {
6859                         switch (ring->pipe) {
6860                         case 0:
6861                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6862                                 break;
6863                         case 1:
6864                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6865                                 break;
6866                         case 2:
6867                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6868                                 break;
6869                         case 3:
6870                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6871                                 break;
6872                         default:
6873                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6874                                 break;
6875                         }
6876                 } else if (ring->me == 2) {
6877                         switch (ring->pipe) {
6878                         case 0:
6879                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6880                                 break;
6881                         case 1:
6882                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6883                                 break;
6884                         case 2:
6885                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6886                                 break;
6887                         case 3:
6888                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6889                                 break;
6890                         default:
6891                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6892                                 break;
6893                         }
6894                 } else {
6895                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6896                 }
6897         }
6898         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6899                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6900                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6901                 if (ring->me == 1) {
6902                         switch (ring->pipe) {
6903                         case 0:
6904                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6905                                 break;
6906                         case 1:
6907                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6908                                 break;
6909                         case 2:
6910                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6911                                 break;
6912                         case 3:
6913                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6914                                 break;
6915                         default:
6916                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6917                                 break;
6918                         }
6919                 } else if (ring->me == 2) {
6920                         switch (ring->pipe) {
6921                         case 0:
6922                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6923                                 break;
6924                         case 1:
6925                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6926                                 break;
6927                         case 2:
6928                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6929                                 break;
6930                         case 3:
6931                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6932                                 break;
6933                         default:
6934                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6935                                 break;
6936                         }
6937                 } else {
6938                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6939                 }
6940         }
6941
6942         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6943                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6944                 dma_cntl |= TRAP_ENABLE;
6945         }
6946
6947         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6948                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6949                 dma_cntl1 |= TRAP_ENABLE;
6950         }
6951
6952         if (rdev->irq.crtc_vblank_int[0] ||
6953             atomic_read(&rdev->irq.pflip[0])) {
6954                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6955                 crtc1 |= VBLANK_INTERRUPT_MASK;
6956         }
6957         if (rdev->irq.crtc_vblank_int[1] ||
6958             atomic_read(&rdev->irq.pflip[1])) {
6959                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6960                 crtc2 |= VBLANK_INTERRUPT_MASK;
6961         }
6962         if (rdev->irq.crtc_vblank_int[2] ||
6963             atomic_read(&rdev->irq.pflip[2])) {
6964                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6965                 crtc3 |= VBLANK_INTERRUPT_MASK;
6966         }
6967         if (rdev->irq.crtc_vblank_int[3] ||
6968             atomic_read(&rdev->irq.pflip[3])) {
6969                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6970                 crtc4 |= VBLANK_INTERRUPT_MASK;
6971         }
6972         if (rdev->irq.crtc_vblank_int[4] ||
6973             atomic_read(&rdev->irq.pflip[4])) {
6974                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6975                 crtc5 |= VBLANK_INTERRUPT_MASK;
6976         }
6977         if (rdev->irq.crtc_vblank_int[5] ||
6978             atomic_read(&rdev->irq.pflip[5])) {
6979                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6980                 crtc6 |= VBLANK_INTERRUPT_MASK;
6981         }
6982         if (rdev->irq.hpd[0]) {
6983                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6984                 hpd1 |= DC_HPDx_INT_EN;
6985         }
6986         if (rdev->irq.hpd[1]) {
6987                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6988                 hpd2 |= DC_HPDx_INT_EN;
6989         }
6990         if (rdev->irq.hpd[2]) {
6991                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6992                 hpd3 |= DC_HPDx_INT_EN;
6993         }
6994         if (rdev->irq.hpd[3]) {
6995                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6996                 hpd4 |= DC_HPDx_INT_EN;
6997         }
6998         if (rdev->irq.hpd[4]) {
6999                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7000                 hpd5 |= DC_HPDx_INT_EN;
7001         }
7002         if (rdev->irq.hpd[5]) {
7003                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7004                 hpd6 |= DC_HPDx_INT_EN;
7005         }
7006
7007         if (rdev->irq.dpm_thermal) {
7008                 DRM_DEBUG("dpm thermal\n");
7009                 if (rdev->flags & RADEON_IS_IGP)
7010                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7011                 else
7012                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7013         }
7014
7015         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7016
7017         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7018         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7019
7020         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7021         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7022         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7023         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7024         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7025         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7026         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7027         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7028
7029         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7030
7031         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7032         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7033         if (rdev->num_crtc >= 4) {
7034                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7035                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7036         }
7037         if (rdev->num_crtc >= 6) {
7038                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7039                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7040         }
7041
7042         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7043         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7044         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7045         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7046         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7047         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7048
7049         if (rdev->flags & RADEON_IS_IGP)
7050                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7051         else
7052                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7053
7054         return 0;
7055 }
7056
7057 /**
7058  * cik_irq_ack - ack interrupt sources
7059  *
7060  * @rdev: radeon_device pointer
7061  *
7062  * Ack interrupt sources on the GPU (vblanks, hpd,
7063  * etc.) (CIK).  Certain interrupts sources are sw
7064  * generated and do not require an explicit ack.
7065  */
7066 static inline void cik_irq_ack(struct radeon_device *rdev)
7067 {
7068         u32 tmp;
7069
7070         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7071         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7072         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7073         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7074         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7075         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7076         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7077
7078         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7079                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7080         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7081                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7082         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7083                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7084         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7085                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7086
7087         if (rdev->num_crtc >= 4) {
7088                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7089                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7090                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7091                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7092                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7093                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7094                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7095                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7096         }
7097
7098         if (rdev->num_crtc >= 6) {
7099                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7100                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7101                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7102                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7103                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7104                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7105                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7106                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7107         }
7108
7109         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7110                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7111                 tmp |= DC_HPDx_INT_ACK;
7112                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7113         }
7114         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7115                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7116                 tmp |= DC_HPDx_INT_ACK;
7117                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7118         }
7119         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7120                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7121                 tmp |= DC_HPDx_INT_ACK;
7122                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7123         }
7124         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7125                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7126                 tmp |= DC_HPDx_INT_ACK;
7127                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7128         }
7129         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7130                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7131                 tmp |= DC_HPDx_INT_ACK;
7132                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7133         }
7134         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7135                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7136                 tmp |= DC_HPDx_INT_ACK;
7137                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7138         }
7139 }
7140
7141 /**
7142  * cik_irq_disable - disable interrupts
7143  *
7144  * @rdev: radeon_device pointer
7145  *
7146  * Disable interrupts on the hw (CIK).
7147  */
7148 static void cik_irq_disable(struct radeon_device *rdev)
7149 {
7150         cik_disable_interrupts(rdev);
7151         /* Wait and acknowledge irq */
7152         mdelay(1);
7153         cik_irq_ack(rdev);
7154         cik_disable_interrupt_state(rdev);
7155 }
7156
7157 /**
7158  * cik_irq_disable - disable interrupts for suspend
7159  *
7160  * @rdev: radeon_device pointer
7161  *
7162  * Disable interrupts and stop the RLC (CIK).
7163  * Used for suspend.
7164  */
7165 static void cik_irq_suspend(struct radeon_device *rdev)
7166 {
7167         cik_irq_disable(rdev);
7168         cik_rlc_stop(rdev);
7169 }
7170
7171 /**
7172  * cik_irq_fini - tear down interrupt support
7173  *
7174  * @rdev: radeon_device pointer
7175  *
7176  * Disable interrupts on the hw and free the IH ring
7177  * buffer (CIK).
7178  * Used for driver unload.
7179  */
7180 static void cik_irq_fini(struct radeon_device *rdev)
7181 {
7182         cik_irq_suspend(rdev);
7183         r600_ih_ring_fini(rdev);
7184 }
7185
7186 /**
7187  * cik_get_ih_wptr - get the IH ring buffer wptr
7188  *
7189  * @rdev: radeon_device pointer
7190  *
7191  * Get the IH ring buffer wptr from either the register
7192  * or the writeback memory buffer (CIK).  Also check for
7193  * ring buffer overflow and deal with it.
7194  * Used by cik_irq_process().
7195  * Returns the value of the wptr.
7196  */
7197 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7198 {
7199         u32 wptr, tmp;
7200
7201         if (rdev->wb.enabled)
7202                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7203         else
7204                 wptr = RREG32(IH_RB_WPTR);
7205
7206         if (wptr & RB_OVERFLOW) {
7207                 /* When a ring buffer overflow happen start parsing interrupt
7208                  * from the last not overwritten vector (wptr + 16). Hopefully
7209                  * this should allow us to catchup.
7210                  */
7211                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7212                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7213                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7214                 tmp = RREG32(IH_RB_CNTL);
7215                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7216                 WREG32(IH_RB_CNTL, tmp);
7217         }
7218         return (wptr & rdev->ih.ptr_mask);
7219 }
7220
7221 /*        CIK IV Ring
7222  * Each IV ring entry is 128 bits:
7223  * [7:0]    - interrupt source id
7224  * [31:8]   - reserved
7225  * [59:32]  - interrupt source data
7226  * [63:60]  - reserved
7227  * [71:64]  - RINGID
7228  *            CP:
7229  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7230  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7231  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7232  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7233  *            PIPE_ID - ME0 0=3D
7234  *                    - ME1&2 compute dispatcher (4 pipes each)
7235  *            SDMA:
7236  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7237  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7238  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7239  * [79:72]  - VMID
7240  * [95:80]  - PASID
7241  * [127:96] - reserved
7242  */
7243 /**
7244  * cik_irq_process - interrupt handler
7245  *
7246  * @rdev: radeon_device pointer
7247  *
7248  * Interrupt hander (CIK).  Walk the IH ring,
7249  * ack interrupts and schedule work to handle
7250  * interrupt events.
7251  * Returns irq process return code.
7252  */
7253 int cik_irq_process(struct radeon_device *rdev)
7254 {
7255         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7256         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7257         u32 wptr;
7258         u32 rptr;
7259         u32 src_id, src_data, ring_id;
7260         u8 me_id, pipe_id, queue_id;
7261         u32 ring_index;
7262         bool queue_hotplug = false;
7263         bool queue_reset = false;
7264         u32 addr, status, mc_client;
7265         bool queue_thermal = false;
7266
7267         if (!rdev->ih.enabled || rdev->shutdown)
7268                 return IRQ_NONE;
7269
7270         wptr = cik_get_ih_wptr(rdev);
7271
7272 restart_ih:
7273         /* is somebody else already processing irqs? */
7274         if (atomic_xchg(&rdev->ih.lock, 1))
7275                 return IRQ_NONE;
7276
7277         rptr = rdev->ih.rptr;
7278         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7279
7280         /* Order reading of wptr vs. reading of IH ring data */
7281         rmb();
7282
7283         /* display interrupts */
7284         cik_irq_ack(rdev);
7285
7286         while (rptr != wptr) {
7287                 /* wptr/rptr are in bytes! */
7288                 ring_index = rptr / 4;
7289                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7290                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7291                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7292
7293                 switch (src_id) {
7294                 case 1: /* D1 vblank/vline */
7295                         switch (src_data) {
7296                         case 0: /* D1 vblank */
7297                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7298                                         if (rdev->irq.crtc_vblank_int[0]) {
7299                                                 drm_handle_vblank(rdev->ddev, 0);
7300                                                 rdev->pm.vblank_sync = true;
7301                                                 wake_up(&rdev->irq.vblank_queue);
7302                                         }
7303                                         if (atomic_read(&rdev->irq.pflip[0]))
7304                                                 radeon_crtc_handle_flip(rdev, 0);
7305                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7306                                         DRM_DEBUG("IH: D1 vblank\n");
7307                                 }
7308                                 break;
7309                         case 1: /* D1 vline */
7310                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7311                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7312                                         DRM_DEBUG("IH: D1 vline\n");
7313                                 }
7314                                 break;
7315                         default:
7316                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7317                                 break;
7318                         }
7319                         break;
7320                 case 2: /* D2 vblank/vline */
7321                         switch (src_data) {
7322                         case 0: /* D2 vblank */
7323                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7324                                         if (rdev->irq.crtc_vblank_int[1]) {
7325                                                 drm_handle_vblank(rdev->ddev, 1);
7326                                                 rdev->pm.vblank_sync = true;
7327                                                 wake_up(&rdev->irq.vblank_queue);
7328                                         }
7329                                         if (atomic_read(&rdev->irq.pflip[1]))
7330                                                 radeon_crtc_handle_flip(rdev, 1);
7331                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7332                                         DRM_DEBUG("IH: D2 vblank\n");
7333                                 }
7334                                 break;
7335                         case 1: /* D2 vline */
7336                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7337                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7338                                         DRM_DEBUG("IH: D2 vline\n");
7339                                 }
7340                                 break;
7341                         default:
7342                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7343                                 break;
7344                         }
7345                         break;
7346                 case 3: /* D3 vblank/vline */
7347                         switch (src_data) {
7348                         case 0: /* D3 vblank */
7349                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7350                                         if (rdev->irq.crtc_vblank_int[2]) {
7351                                                 drm_handle_vblank(rdev->ddev, 2);
7352                                                 rdev->pm.vblank_sync = true;
7353                                                 wake_up(&rdev->irq.vblank_queue);
7354                                         }
7355                                         if (atomic_read(&rdev->irq.pflip[2]))
7356                                                 radeon_crtc_handle_flip(rdev, 2);
7357                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7358                                         DRM_DEBUG("IH: D3 vblank\n");
7359                                 }
7360                                 break;
7361                         case 1: /* D3 vline */
7362                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7363                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7364                                         DRM_DEBUG("IH: D3 vline\n");
7365                                 }
7366                                 break;
7367                         default:
7368                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7369                                 break;
7370                         }
7371                         break;
7372                 case 4: /* D4 vblank/vline */
7373                         switch (src_data) {
7374                         case 0: /* D4 vblank */
7375                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7376                                         if (rdev->irq.crtc_vblank_int[3]) {
7377                                                 drm_handle_vblank(rdev->ddev, 3);
7378                                                 rdev->pm.vblank_sync = true;
7379                                                 wake_up(&rdev->irq.vblank_queue);
7380                                         }
7381                                         if (atomic_read(&rdev->irq.pflip[3]))
7382                                                 radeon_crtc_handle_flip(rdev, 3);
7383                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7384                                         DRM_DEBUG("IH: D4 vblank\n");
7385                                 }
7386                                 break;
7387                         case 1: /* D4 vline */
7388                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7389                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7390                                         DRM_DEBUG("IH: D4 vline\n");
7391                                 }
7392                                 break;
7393                         default:
7394                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7395                                 break;
7396                         }
7397                         break;
7398                 case 5: /* D5 vblank/vline */
7399                         switch (src_data) {
7400                         case 0: /* D5 vblank */
7401                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7402                                         if (rdev->irq.crtc_vblank_int[4]) {
7403                                                 drm_handle_vblank(rdev->ddev, 4);
7404                                                 rdev->pm.vblank_sync = true;
7405                                                 wake_up(&rdev->irq.vblank_queue);
7406                                         }
7407                                         if (atomic_read(&rdev->irq.pflip[4]))
7408                                                 radeon_crtc_handle_flip(rdev, 4);
7409                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7410                                         DRM_DEBUG("IH: D5 vblank\n");
7411                                 }
7412                                 break;
7413                         case 1: /* D5 vline */
7414                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7415                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7416                                         DRM_DEBUG("IH: D5 vline\n");
7417                                 }
7418                                 break;
7419                         default:
7420                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7421                                 break;
7422                         }
7423                         break;
7424                 case 6: /* D6 vblank/vline */
7425                         switch (src_data) {
7426                         case 0: /* D6 vblank */
7427                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7428                                         if (rdev->irq.crtc_vblank_int[5]) {
7429                                                 drm_handle_vblank(rdev->ddev, 5);
7430                                                 rdev->pm.vblank_sync = true;
7431                                                 wake_up(&rdev->irq.vblank_queue);
7432                                         }
7433                                         if (atomic_read(&rdev->irq.pflip[5]))
7434                                                 radeon_crtc_handle_flip(rdev, 5);
7435                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7436                                         DRM_DEBUG("IH: D6 vblank\n");
7437                                 }
7438                                 break;
7439                         case 1: /* D6 vline */
7440                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7441                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7442                                         DRM_DEBUG("IH: D6 vline\n");
7443                                 }
7444                                 break;
7445                         default:
7446                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7447                                 break;
7448                         }
7449                         break;
7450                 case 42: /* HPD hotplug */
7451                         switch (src_data) {
7452                         case 0:
7453                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7454                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7455                                         queue_hotplug = true;
7456                                         DRM_DEBUG("IH: HPD1\n");
7457                                 }
7458                                 break;
7459                         case 1:
7460                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7461                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7462                                         queue_hotplug = true;
7463                                         DRM_DEBUG("IH: HPD2\n");
7464                                 }
7465                                 break;
7466                         case 2:
7467                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7468                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7469                                         queue_hotplug = true;
7470                                         DRM_DEBUG("IH: HPD3\n");
7471                                 }
7472                                 break;
7473                         case 3:
7474                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7475                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7476                                         queue_hotplug = true;
7477                                         DRM_DEBUG("IH: HPD4\n");
7478                                 }
7479                                 break;
7480                         case 4:
7481                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7482                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7483                                         queue_hotplug = true;
7484                                         DRM_DEBUG("IH: HPD5\n");
7485                                 }
7486                                 break;
7487                         case 5:
7488                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7489                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7490                                         queue_hotplug = true;
7491                                         DRM_DEBUG("IH: HPD6\n");
7492                                 }
7493                                 break;
7494                         default:
7495                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7496                                 break;
7497                         }
7498                         break;
7499                 case 124: /* UVD */
7500                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7501                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7502                         break;
7503                 case 146:
7504                 case 147:
7505                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7506                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7507                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7508                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7509                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7510                                 addr);
7511                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7512                                 status);
7513                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7514                         /* reset addr and status */
7515                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7516                         break;
7517                 case 167: /* VCE */
7518                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7519                         switch (src_data) {
7520                         case 0:
7521                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7522                                 break;
7523                         case 1:
7524                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7525                                 break;
7526                         default:
7527                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7528                                 break;
7529                         }
7530                         break;
7531                 case 176: /* GFX RB CP_INT */
7532                 case 177: /* GFX IB CP_INT */
7533                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7534                         break;
7535                 case 181: /* CP EOP event */
7536                         DRM_DEBUG("IH: CP EOP\n");
7537                         /* XXX check the bitfield order! */
7538                         me_id = (ring_id & 0x60) >> 5;
7539                         pipe_id = (ring_id & 0x18) >> 3;
7540                         queue_id = (ring_id & 0x7) >> 0;
7541                         switch (me_id) {
7542                         case 0:
7543                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7544                                 break;
7545                         case 1:
7546                         case 2:
7547                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7548                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7549                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7550                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7551                                 break;
7552                         }
7553                         break;
7554                 case 184: /* CP Privileged reg access */
7555                         DRM_ERROR("Illegal register access in command stream\n");
7556                         /* XXX check the bitfield order! */
7557                         me_id = (ring_id & 0x60) >> 5;
7558                         pipe_id = (ring_id & 0x18) >> 3;
7559                         queue_id = (ring_id & 0x7) >> 0;
7560                         switch (me_id) {
7561                         case 0:
7562                                 /* This results in a full GPU reset, but all we need to do is soft
7563                                  * reset the CP for gfx
7564                                  */
7565                                 queue_reset = true;
7566                                 break;
7567                         case 1:
7568                                 /* XXX compute */
7569                                 queue_reset = true;
7570                                 break;
7571                         case 2:
7572                                 /* XXX compute */
7573                                 queue_reset = true;
7574                                 break;
7575                         }
7576                         break;
7577                 case 185: /* CP Privileged inst */
7578                         DRM_ERROR("Illegal instruction in command stream\n");
7579                         /* XXX check the bitfield order! */
7580                         me_id = (ring_id & 0x60) >> 5;
7581                         pipe_id = (ring_id & 0x18) >> 3;
7582                         queue_id = (ring_id & 0x7) >> 0;
7583                         switch (me_id) {
7584                         case 0:
7585                                 /* This results in a full GPU reset, but all we need to do is soft
7586                                  * reset the CP for gfx
7587                                  */
7588                                 queue_reset = true;
7589                                 break;
7590                         case 1:
7591                                 /* XXX compute */
7592                                 queue_reset = true;
7593                                 break;
7594                         case 2:
7595                                 /* XXX compute */
7596                                 queue_reset = true;
7597                                 break;
7598                         }
7599                         break;
7600                 case 224: /* SDMA trap event */
7601                         /* XXX check the bitfield order! */
7602                         me_id = (ring_id & 0x3) >> 0;
7603                         queue_id = (ring_id & 0xc) >> 2;
7604                         DRM_DEBUG("IH: SDMA trap\n");
7605                         switch (me_id) {
7606                         case 0:
7607                                 switch (queue_id) {
7608                                 case 0:
7609                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7610                                         break;
7611                                 case 1:
7612                                         /* XXX compute */
7613                                         break;
7614                                 case 2:
7615                                         /* XXX compute */
7616                                         break;
7617                                 }
7618                                 break;
7619                         case 1:
7620                                 switch (queue_id) {
7621                                 case 0:
7622                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7623                                         break;
7624                                 case 1:
7625                                         /* XXX compute */
7626                                         break;
7627                                 case 2:
7628                                         /* XXX compute */
7629                                         break;
7630                                 }
7631                                 break;
7632                         }
7633                         break;
7634                 case 230: /* thermal low to high */
7635                         DRM_DEBUG("IH: thermal low to high\n");
7636                         rdev->pm.dpm.thermal.high_to_low = false;
7637                         queue_thermal = true;
7638                         break;
7639                 case 231: /* thermal high to low */
7640                         DRM_DEBUG("IH: thermal high to low\n");
7641                         rdev->pm.dpm.thermal.high_to_low = true;
7642                         queue_thermal = true;
7643                         break;
7644                 case 233: /* GUI IDLE */
7645                         DRM_DEBUG("IH: GUI idle\n");
7646                         break;
7647                 case 241: /* SDMA Privileged inst */
7648                 case 247: /* SDMA Privileged inst */
7649                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7650                         /* XXX check the bitfield order! */
7651                         me_id = (ring_id & 0x3) >> 0;
7652                         queue_id = (ring_id & 0xc) >> 2;
7653                         switch (me_id) {
7654                         case 0:
7655                                 switch (queue_id) {
7656                                 case 0:
7657                                         queue_reset = true;
7658                                         break;
7659                                 case 1:
7660                                         /* XXX compute */
7661                                         queue_reset = true;
7662                                         break;
7663                                 case 2:
7664                                         /* XXX compute */
7665                                         queue_reset = true;
7666                                         break;
7667                                 }
7668                                 break;
7669                         case 1:
7670                                 switch (queue_id) {
7671                                 case 0:
7672                                         queue_reset = true;
7673                                         break;
7674                                 case 1:
7675                                         /* XXX compute */
7676                                         queue_reset = true;
7677                                         break;
7678                                 case 2:
7679                                         /* XXX compute */
7680                                         queue_reset = true;
7681                                         break;
7682                                 }
7683                                 break;
7684                         }
7685                         break;
7686                 default:
7687                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7688                         break;
7689                 }
7690
7691                 /* wptr/rptr are in bytes! */
7692                 rptr += 16;
7693                 rptr &= rdev->ih.ptr_mask;
7694         }
7695         if (queue_hotplug)
7696                 schedule_work(&rdev->hotplug_work);
7697         if (queue_reset)
7698                 schedule_work(&rdev->reset_work);
7699         if (queue_thermal)
7700                 schedule_work(&rdev->pm.dpm.thermal.work);
7701         rdev->ih.rptr = rptr;
7702         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7703         atomic_set(&rdev->ih.lock, 0);
7704
7705         /* make sure wptr hasn't changed while processing */
7706         wptr = cik_get_ih_wptr(rdev);
7707         if (wptr != rptr)
7708                 goto restart_ih;
7709
7710         return IRQ_HANDLED;
7711 }
7712
7713 /*
7714  * startup/shutdown callbacks
7715  */
7716 /**
7717  * cik_startup - program the asic to a functional state
7718  *
7719  * @rdev: radeon_device pointer
7720  *
7721  * Programs the asic to a functional state (CIK).
7722  * Called by cik_init() and cik_resume().
7723  * Returns 0 for success, error for failure.
7724  */
7725 static int cik_startup(struct radeon_device *rdev)
7726 {
7727         struct radeon_ring *ring;
7728         int r;
7729
7730         /* enable pcie gen2/3 link */
7731         cik_pcie_gen3_enable(rdev);
7732         /* enable aspm */
7733         cik_program_aspm(rdev);
7734
7735         /* scratch needs to be initialized before MC */
7736         r = r600_vram_scratch_init(rdev);
7737         if (r)
7738                 return r;
7739
7740         cik_mc_program(rdev);
7741
7742         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7743                 r = ci_mc_load_microcode(rdev);
7744                 if (r) {
7745                         DRM_ERROR("Failed to load MC firmware!\n");
7746                         return r;
7747                 }
7748         }
7749
7750         r = cik_pcie_gart_enable(rdev);
7751         if (r)
7752                 return r;
7753         cik_gpu_init(rdev);
7754
7755         /* allocate rlc buffers */
7756         if (rdev->flags & RADEON_IS_IGP) {
7757                 if (rdev->family == CHIP_KAVERI) {
7758                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7759                         rdev->rlc.reg_list_size =
7760                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7761                 } else {
7762                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7763                         rdev->rlc.reg_list_size =
7764                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7765                 }
7766         }
7767         rdev->rlc.cs_data = ci_cs_data;
7768         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7769         r = sumo_rlc_init(rdev);
7770         if (r) {
7771                 DRM_ERROR("Failed to init rlc BOs!\n");
7772                 return r;
7773         }
7774
7775         /* allocate wb buffer */
7776         r = radeon_wb_init(rdev);
7777         if (r)
7778                 return r;
7779
7780         /* allocate mec buffers */
7781         r = cik_mec_init(rdev);
7782         if (r) {
7783                 DRM_ERROR("Failed to init MEC BOs!\n");
7784                 return r;
7785         }
7786
7787         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7788         if (r) {
7789                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7790                 return r;
7791         }
7792
7793         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7794         if (r) {
7795                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7796                 return r;
7797         }
7798
7799         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7800         if (r) {
7801                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7802                 return r;
7803         }
7804
7805         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7806         if (r) {
7807                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7808                 return r;
7809         }
7810
7811         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7812         if (r) {
7813                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7814                 return r;
7815         }
7816
7817         r = radeon_uvd_resume(rdev);
7818         if (!r) {
7819                 r = uvd_v4_2_resume(rdev);
7820                 if (!r) {
7821                         r = radeon_fence_driver_start_ring(rdev,
7822                                                            R600_RING_TYPE_UVD_INDEX);
7823                         if (r)
7824                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7825                 }
7826         }
7827         if (r)
7828                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7829
7830         r = radeon_vce_resume(rdev);
7831         if (!r) {
7832                 r = vce_v2_0_resume(rdev);
7833                 if (!r)
7834                         r = radeon_fence_driver_start_ring(rdev,
7835                                                            TN_RING_TYPE_VCE1_INDEX);
7836                 if (!r)
7837                         r = radeon_fence_driver_start_ring(rdev,
7838                                                            TN_RING_TYPE_VCE2_INDEX);
7839         }
7840         if (r) {
7841                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
7842                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7843                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7844         }
7845
7846         /* Enable IRQ */
7847         if (!rdev->irq.installed) {
7848                 r = radeon_irq_kms_init(rdev);
7849                 if (r)
7850                         return r;
7851         }
7852
7853         r = cik_irq_init(rdev);
7854         if (r) {
7855                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7856                 radeon_irq_kms_fini(rdev);
7857                 return r;
7858         }
7859         cik_irq_set(rdev);
7860
7861         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7862         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7863                              PACKET3(PACKET3_NOP, 0x3FFF));
7864         if (r)
7865                 return r;
7866
7867         /* set up the compute queues */
7868         /* type-2 packets are deprecated on MEC, use type-3 instead */
7869         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7870         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7871                              PACKET3(PACKET3_NOP, 0x3FFF));
7872         if (r)
7873                 return r;
7874         ring->me = 1; /* first MEC */
7875         ring->pipe = 0; /* first pipe */
7876         ring->queue = 0; /* first queue */
7877         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7878
7879         /* type-2 packets are deprecated on MEC, use type-3 instead */
7880         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7881         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7882                              PACKET3(PACKET3_NOP, 0x3FFF));
7883         if (r)
7884                 return r;
7885         /* dGPU only have 1 MEC */
7886         ring->me = 1; /* first MEC */
7887         ring->pipe = 0; /* first pipe */
7888         ring->queue = 1; /* second queue */
7889         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7890
7891         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7892         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7893                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7894         if (r)
7895                 return r;
7896
7897         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7898         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7899                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7900         if (r)
7901                 return r;
7902
7903         r = cik_cp_resume(rdev);
7904         if (r)
7905                 return r;
7906
7907         r = cik_sdma_resume(rdev);
7908         if (r)
7909                 return r;
7910
7911         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7912         if (ring->ring_size) {
7913                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7914                                      RADEON_CP_PACKET2);
7915                 if (!r)
7916                         r = uvd_v1_0_init(rdev);
7917                 if (r)
7918                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7919         }
7920
7921         r = -ENOENT;
7922
7923         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7924         if (ring->ring_size)
7925                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7926                                      VCE_CMD_NO_OP);
7927
7928         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7929         if (ring->ring_size)
7930                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7931                                      VCE_CMD_NO_OP);
7932
7933         if (!r)
7934                 r = vce_v1_0_init(rdev);
7935         else if (r != -ENOENT)
7936                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
7937
7938         r = radeon_ib_pool_init(rdev);
7939         if (r) {
7940                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7941                 return r;
7942         }
7943
7944         r = radeon_vm_manager_init(rdev);
7945         if (r) {
7946                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7947                 return r;
7948         }
7949
7950         r = dce6_audio_init(rdev);
7951         if (r)
7952                 return r;
7953
7954         return 0;
7955 }
7956
7957 /**
7958  * cik_resume - resume the asic to a functional state
7959  *
7960  * @rdev: radeon_device pointer
7961  *
7962  * Programs the asic to a functional state (CIK).
7963  * Called at resume.
7964  * Returns 0 for success, error for failure.
7965  */
7966 int cik_resume(struct radeon_device *rdev)
7967 {
7968         int r;
7969
7970         /* post card */
7971         atom_asic_init(rdev->mode_info.atom_context);
7972
7973         /* init golden registers */
7974         cik_init_golden_registers(rdev);
7975
7976         if (rdev->pm.pm_method == PM_METHOD_DPM)
7977                 radeon_pm_resume(rdev);
7978
7979         rdev->accel_working = true;
7980         r = cik_startup(rdev);
7981         if (r) {
7982                 DRM_ERROR("cik startup failed on resume\n");
7983                 rdev->accel_working = false;
7984                 return r;
7985         }
7986
7987         return r;
7988
7989 }
7990
7991 /**
7992  * cik_suspend - suspend the asic
7993  *
7994  * @rdev: radeon_device pointer
7995  *
7996  * Bring the chip into a state suitable for suspend (CIK).
7997  * Called at suspend.
7998  * Returns 0 for success.
7999  */
8000 int cik_suspend(struct radeon_device *rdev)
8001 {
8002         radeon_pm_suspend(rdev);
8003         dce6_audio_fini(rdev);
8004         radeon_vm_manager_fini(rdev);
8005         cik_cp_enable(rdev, false);
8006         cik_sdma_enable(rdev, false);
8007         uvd_v1_0_fini(rdev);
8008         radeon_uvd_suspend(rdev);
8009         radeon_vce_suspend(rdev);
8010         cik_fini_pg(rdev);
8011         cik_fini_cg(rdev);
8012         cik_irq_suspend(rdev);
8013         radeon_wb_disable(rdev);
8014         cik_pcie_gart_disable(rdev);
8015         return 0;
8016 }
8017
8018 /* Plan is to move initialization in that function and use
8019  * helper function so that radeon_device_init pretty much
8020  * do nothing more than calling asic specific function. This
8021  * should also allow to remove a bunch of callback function
8022  * like vram_info.
8023  */
8024 /**
8025  * cik_init - asic specific driver and hw init
8026  *
8027  * @rdev: radeon_device pointer
8028  *
8029  * Setup asic specific driver variables and program the hw
8030  * to a functional state (CIK).
8031  * Called at driver startup.
8032  * Returns 0 for success, errors for failure.
8033  */
8034 int cik_init(struct radeon_device *rdev)
8035 {
8036         struct radeon_ring *ring;
8037         int r;
8038
8039         /* Read BIOS */
8040         if (!radeon_get_bios(rdev)) {
8041                 if (ASIC_IS_AVIVO(rdev))
8042                         return -EINVAL;
8043         }
8044         /* Must be an ATOMBIOS */
8045         if (!rdev->is_atom_bios) {
8046                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8047                 return -EINVAL;
8048         }
8049         r = radeon_atombios_init(rdev);
8050         if (r)
8051                 return r;
8052
8053         /* Post card if necessary */
8054         if (!radeon_card_posted(rdev)) {
8055                 if (!rdev->bios) {
8056                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8057                         return -EINVAL;
8058                 }
8059                 DRM_INFO("GPU not posted. posting now...\n");
8060                 atom_asic_init(rdev->mode_info.atom_context);
8061         }
8062         /* init golden registers */
8063         cik_init_golden_registers(rdev);
8064         /* Initialize scratch registers */
8065         cik_scratch_init(rdev);
8066         /* Initialize surface registers */
8067         radeon_surface_init(rdev);
8068         /* Initialize clocks */
8069         radeon_get_clock_info(rdev->ddev);
8070
8071         /* Fence driver */
8072         r = radeon_fence_driver_init(rdev);
8073         if (r)
8074                 return r;
8075
8076         /* initialize memory controller */
8077         r = cik_mc_init(rdev);
8078         if (r)
8079                 return r;
8080         /* Memory manager */
8081         r = radeon_bo_init(rdev);
8082         if (r)
8083                 return r;
8084
8085         if (rdev->flags & RADEON_IS_IGP) {
8086                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8087                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8088                         r = cik_init_microcode(rdev);
8089                         if (r) {
8090                                 DRM_ERROR("Failed to load firmware!\n");
8091                                 return r;
8092                         }
8093                 }
8094         } else {
8095                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8096                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8097                     !rdev->mc_fw) {
8098                         r = cik_init_microcode(rdev);
8099                         if (r) {
8100                                 DRM_ERROR("Failed to load firmware!\n");
8101                                 return r;
8102                         }
8103                 }
8104         }
8105
8106         /* Initialize power management */
8107         radeon_pm_init(rdev);
8108
8109         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8110         ring->ring_obj = NULL;
8111         r600_ring_init(rdev, ring, 1024 * 1024);
8112
8113         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8114         ring->ring_obj = NULL;
8115         r600_ring_init(rdev, ring, 1024 * 1024);
8116         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8117         if (r)
8118                 return r;
8119
8120         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8121         ring->ring_obj = NULL;
8122         r600_ring_init(rdev, ring, 1024 * 1024);
8123         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8124         if (r)
8125                 return r;
8126
8127         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8128         ring->ring_obj = NULL;
8129         r600_ring_init(rdev, ring, 256 * 1024);
8130
8131         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8132         ring->ring_obj = NULL;
8133         r600_ring_init(rdev, ring, 256 * 1024);
8134
8135         r = radeon_uvd_init(rdev);
8136         if (!r) {
8137                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8138                 ring->ring_obj = NULL;
8139                 r600_ring_init(rdev, ring, 4096);
8140         }
8141
8142         r = radeon_vce_init(rdev);
8143         if (!r) {
8144                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8145                 ring->ring_obj = NULL;
8146                 r600_ring_init(rdev, ring, 4096);
8147
8148                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8149                 ring->ring_obj = NULL;
8150                 r600_ring_init(rdev, ring, 4096);
8151         }
8152
8153         rdev->ih.ring_obj = NULL;
8154         r600_ih_ring_init(rdev, 64 * 1024);
8155
8156         r = r600_pcie_gart_init(rdev);
8157         if (r)
8158                 return r;
8159
8160         rdev->accel_working = true;
8161         r = cik_startup(rdev);
8162         if (r) {
8163                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8164                 cik_cp_fini(rdev);
8165                 cik_sdma_fini(rdev);
8166                 cik_irq_fini(rdev);
8167                 sumo_rlc_fini(rdev);
8168                 cik_mec_fini(rdev);
8169                 radeon_wb_fini(rdev);
8170                 radeon_ib_pool_fini(rdev);
8171                 radeon_vm_manager_fini(rdev);
8172                 radeon_irq_kms_fini(rdev);
8173                 cik_pcie_gart_fini(rdev);
8174                 rdev->accel_working = false;
8175         }
8176
8177         /* Don't start up if the MC ucode is missing.
8178          * The default clocks and voltages before the MC ucode
8179          * is loaded are not suffient for advanced operations.
8180          */
8181         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8182                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8183                 return -EINVAL;
8184         }
8185
8186         return 0;
8187 }
8188
8189 /**
8190  * cik_fini - asic specific driver and hw fini
8191  *
8192  * @rdev: radeon_device pointer
8193  *
8194  * Tear down the asic specific driver variables and program the hw
8195  * to an idle state (CIK).
8196  * Called at driver unload.
8197  */
8198 void cik_fini(struct radeon_device *rdev)
8199 {
8200         radeon_pm_fini(rdev);
8201         cik_cp_fini(rdev);
8202         cik_sdma_fini(rdev);
8203         cik_fini_pg(rdev);
8204         cik_fini_cg(rdev);
8205         cik_irq_fini(rdev);
8206         sumo_rlc_fini(rdev);
8207         cik_mec_fini(rdev);
8208         radeon_wb_fini(rdev);
8209         radeon_vm_manager_fini(rdev);
8210         radeon_ib_pool_fini(rdev);
8211         radeon_irq_kms_fini(rdev);
8212         uvd_v1_0_fini(rdev);
8213         radeon_uvd_fini(rdev);
8214         radeon_vce_fini(rdev);
8215         cik_pcie_gart_fini(rdev);
8216         r600_vram_scratch_fini(rdev);
8217         radeon_gem_fini(rdev);
8218         radeon_fence_driver_fini(rdev);
8219         radeon_bo_fini(rdev);
8220         radeon_atombios_fini(rdev);
8221         kfree(rdev->bios);
8222         rdev->bios = NULL;
8223 }
8224
8225 void dce8_program_fmt(struct drm_encoder *encoder)
8226 {
8227         struct drm_device *dev = encoder->dev;
8228         struct radeon_device *rdev = dev->dev_private;
8229         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8230         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8231         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8232         int bpc = 0;
8233         u32 tmp = 0;
8234         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8235
8236         if (connector) {
8237                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8238                 bpc = radeon_get_monitor_bpc(connector);
8239                 dither = radeon_connector->dither;
8240         }
8241
8242         /* LVDS/eDP FMT is set up by atom */
8243         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8244                 return;
8245
8246         /* not needed for analog */
8247         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8248             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8249                 return;
8250
8251         if (bpc == 0)
8252                 return;
8253
8254         switch (bpc) {
8255         case 6:
8256                 if (dither == RADEON_FMT_DITHER_ENABLE)
8257                         /* XXX sort out optimal dither settings */
8258                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8259                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8260                 else
8261                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8262                 break;
8263         case 8:
8264                 if (dither == RADEON_FMT_DITHER_ENABLE)
8265                         /* XXX sort out optimal dither settings */
8266                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8267                                 FMT_RGB_RANDOM_ENABLE |
8268                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8269                 else
8270                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8271                 break;
8272         case 10:
8273                 if (dither == RADEON_FMT_DITHER_ENABLE)
8274                         /* XXX sort out optimal dither settings */
8275                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8276                                 FMT_RGB_RANDOM_ENABLE |
8277                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8278                 else
8279                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8280                 break;
8281         default:
8282                 /* not needed */
8283                 break;
8284         }
8285
8286         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8287 }
8288
8289 /* display watermark setup */
8290 /**
8291  * dce8_line_buffer_adjust - Set up the line buffer
8292  *
8293  * @rdev: radeon_device pointer
8294  * @radeon_crtc: the selected display controller
8295  * @mode: the current display mode on the selected display
8296  * controller
8297  *
8298  * Setup up the line buffer allocation for
8299  * the selected display controller (CIK).
8300  * Returns the line buffer size in pixels.
8301  */
8302 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8303                                    struct radeon_crtc *radeon_crtc,
8304                                    struct drm_display_mode *mode)
8305 {
8306         u32 tmp, buffer_alloc, i;
8307         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8308         /*
8309          * Line Buffer Setup
8310          * There are 6 line buffers, one for each display controllers.
8311          * There are 3 partitions per LB. Select the number of partitions
8312          * to enable based on the display width.  For display widths larger
8313          * than 4096, you need use to use 2 display controllers and combine
8314          * them using the stereo blender.
8315          */
8316         if (radeon_crtc->base.enabled && mode) {
8317                 if (mode->crtc_hdisplay < 1920) {
8318                         tmp = 1;
8319                         buffer_alloc = 2;
8320                 } else if (mode->crtc_hdisplay < 2560) {
8321                         tmp = 2;
8322                         buffer_alloc = 2;
8323                 } else if (mode->crtc_hdisplay < 4096) {
8324                         tmp = 0;
8325                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8326                 } else {
8327                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8328                         tmp = 0;
8329                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8330                 }
8331         } else {
8332                 tmp = 1;
8333                 buffer_alloc = 0;
8334         }
8335
8336         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8337                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8338
8339         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8340                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8341         for (i = 0; i < rdev->usec_timeout; i++) {
8342                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8343                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8344                         break;
8345                 udelay(1);
8346         }
8347
8348         if (radeon_crtc->base.enabled && mode) {
8349                 switch (tmp) {
8350                 case 0:
8351                 default:
8352                         return 4096 * 2;
8353                 case 1:
8354                         return 1920 * 2;
8355                 case 2:
8356                         return 2560 * 2;
8357                 }
8358         }
8359
8360         /* controller not enabled, so no lb used */
8361         return 0;
8362 }
8363
8364 /**
8365  * cik_get_number_of_dram_channels - get the number of dram channels
8366  *
8367  * @rdev: radeon_device pointer
8368  *
8369  * Look up the number of video ram channels (CIK).
8370  * Used for display watermark bandwidth calculations
8371  * Returns the number of dram channels
8372  */
8373 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8374 {
8375         u32 tmp = RREG32(MC_SHARED_CHMAP);
8376
8377         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8378         case 0:
8379         default:
8380                 return 1;
8381         case 1:
8382                 return 2;
8383         case 2:
8384                 return 4;
8385         case 3:
8386                 return 8;
8387         case 4:
8388                 return 3;
8389         case 5:
8390                 return 6;
8391         case 6:
8392                 return 10;
8393         case 7:
8394                 return 12;
8395         case 8:
8396                 return 16;
8397         }
8398 }
8399
8400 struct dce8_wm_params {
8401         u32 dram_channels; /* number of dram channels */
8402         u32 yclk;          /* bandwidth per dram data pin in kHz */
8403         u32 sclk;          /* engine clock in kHz */
8404         u32 disp_clk;      /* display clock in kHz */
8405         u32 src_width;     /* viewport width */
8406         u32 active_time;   /* active display time in ns */
8407         u32 blank_time;    /* blank time in ns */
8408         bool interlaced;    /* mode is interlaced */
8409         fixed20_12 vsc;    /* vertical scale ratio */
8410         u32 num_heads;     /* number of active crtcs */
8411         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8412         u32 lb_size;       /* line buffer allocated to pipe */
8413         u32 vtaps;         /* vertical scaler taps */
8414 };
8415
8416 /**
8417  * dce8_dram_bandwidth - get the dram bandwidth
8418  *
8419  * @wm: watermark calculation data
8420  *
8421  * Calculate the raw dram bandwidth (CIK).
8422  * Used for display watermark bandwidth calculations
8423  * Returns the dram bandwidth in MBytes/s
8424  */
8425 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8426 {
8427         /* Calculate raw DRAM Bandwidth */
8428         fixed20_12 dram_efficiency; /* 0.7 */
8429         fixed20_12 yclk, dram_channels, bandwidth;
8430         fixed20_12 a;
8431
8432         a.full = dfixed_const(1000);
8433         yclk.full = dfixed_const(wm->yclk);
8434         yclk.full = dfixed_div(yclk, a);
8435         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8436         a.full = dfixed_const(10);
8437         dram_efficiency.full = dfixed_const(7);
8438         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8439         bandwidth.full = dfixed_mul(dram_channels, yclk);
8440         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8441
8442         return dfixed_trunc(bandwidth);
8443 }
8444
8445 /**
8446  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8447  *
8448  * @wm: watermark calculation data
8449  *
8450  * Calculate the dram bandwidth used for display (CIK).
8451  * Used for display watermark bandwidth calculations
8452  * Returns the dram bandwidth for display in MBytes/s
8453  */
8454 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8455 {
8456         /* Calculate DRAM Bandwidth and the part allocated to display. */
8457         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8458         fixed20_12 yclk, dram_channels, bandwidth;
8459         fixed20_12 a;
8460
8461         a.full = dfixed_const(1000);
8462         yclk.full = dfixed_const(wm->yclk);
8463         yclk.full = dfixed_div(yclk, a);
8464         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8465         a.full = dfixed_const(10);
8466         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8467         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8468         bandwidth.full = dfixed_mul(dram_channels, yclk);
8469         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8470
8471         return dfixed_trunc(bandwidth);
8472 }
8473
8474 /**
8475  * dce8_data_return_bandwidth - get the data return bandwidth
8476  *
8477  * @wm: watermark calculation data
8478  *
8479  * Calculate the data return bandwidth used for display (CIK).
8480  * Used for display watermark bandwidth calculations
8481  * Returns the data return bandwidth in MBytes/s
8482  */
8483 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8484 {
8485         /* Calculate the display Data return Bandwidth */
8486         fixed20_12 return_efficiency; /* 0.8 */
8487         fixed20_12 sclk, bandwidth;
8488         fixed20_12 a;
8489
8490         a.full = dfixed_const(1000);
8491         sclk.full = dfixed_const(wm->sclk);
8492         sclk.full = dfixed_div(sclk, a);
8493         a.full = dfixed_const(10);
8494         return_efficiency.full = dfixed_const(8);
8495         return_efficiency.full = dfixed_div(return_efficiency, a);
8496         a.full = dfixed_const(32);
8497         bandwidth.full = dfixed_mul(a, sclk);
8498         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8499
8500         return dfixed_trunc(bandwidth);
8501 }
8502
8503 /**
8504  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8505  *
8506  * @wm: watermark calculation data
8507  *
8508  * Calculate the dmif bandwidth used for display (CIK).
8509  * Used for display watermark bandwidth calculations
8510  * Returns the dmif bandwidth in MBytes/s
8511  */
8512 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8513 {
8514         /* Calculate the DMIF Request Bandwidth */
8515         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8516         fixed20_12 disp_clk, bandwidth;
8517         fixed20_12 a, b;
8518
8519         a.full = dfixed_const(1000);
8520         disp_clk.full = dfixed_const(wm->disp_clk);
8521         disp_clk.full = dfixed_div(disp_clk, a);
8522         a.full = dfixed_const(32);
8523         b.full = dfixed_mul(a, disp_clk);
8524
8525         a.full = dfixed_const(10);
8526         disp_clk_request_efficiency.full = dfixed_const(8);
8527         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8528
8529         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8530
8531         return dfixed_trunc(bandwidth);
8532 }
8533
8534 /**
8535  * dce8_available_bandwidth - get the min available bandwidth
8536  *
8537  * @wm: watermark calculation data
8538  *
8539  * Calculate the min available bandwidth used for display (CIK).
8540  * Used for display watermark bandwidth calculations
8541  * Returns the min available bandwidth in MBytes/s
8542  */
8543 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8544 {
8545         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8546         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8547         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8548         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8549
8550         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8551 }
8552
8553 /**
8554  * dce8_average_bandwidth - get the average available bandwidth
8555  *
8556  * @wm: watermark calculation data
8557  *
8558  * Calculate the average available bandwidth used for display (CIK).
8559  * Used for display watermark bandwidth calculations
8560  * Returns the average available bandwidth in MBytes/s
8561  */
8562 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8563 {
8564         /* Calculate the display mode Average Bandwidth
8565          * DisplayMode should contain the source and destination dimensions,
8566          * timing, etc.
8567          */
8568         fixed20_12 bpp;
8569         fixed20_12 line_time;
8570         fixed20_12 src_width;
8571         fixed20_12 bandwidth;
8572         fixed20_12 a;
8573
8574         a.full = dfixed_const(1000);
8575         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8576         line_time.full = dfixed_div(line_time, a);
8577         bpp.full = dfixed_const(wm->bytes_per_pixel);
8578         src_width.full = dfixed_const(wm->src_width);
8579         bandwidth.full = dfixed_mul(src_width, bpp);
8580         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8581         bandwidth.full = dfixed_div(bandwidth, line_time);
8582
8583         return dfixed_trunc(bandwidth);
8584 }
8585
8586 /**
8587  * dce8_latency_watermark - get the latency watermark
8588  *
8589  * @wm: watermark calculation data
8590  *
8591  * Calculate the latency watermark (CIK).
8592  * Used for display watermark bandwidth calculations
8593  * Returns the latency watermark in ns
8594  */
8595 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8596 {
8597         /* First calculate the latency in ns */
8598         u32 mc_latency = 2000; /* 2000 ns. */
8599         u32 available_bandwidth = dce8_available_bandwidth(wm);
8600         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8601         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8602         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8603         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8604                 (wm->num_heads * cursor_line_pair_return_time);
8605         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8606         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8607         u32 tmp, dmif_size = 12288;
8608         fixed20_12 a, b, c;
8609
8610         if (wm->num_heads == 0)
8611                 return 0;
8612
8613         a.full = dfixed_const(2);
8614         b.full = dfixed_const(1);
8615         if ((wm->vsc.full > a.full) ||
8616             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8617             (wm->vtaps >= 5) ||
8618             ((wm->vsc.full >= a.full) && wm->interlaced))
8619                 max_src_lines_per_dst_line = 4;
8620         else
8621                 max_src_lines_per_dst_line = 2;
8622
8623         a.full = dfixed_const(available_bandwidth);
8624         b.full = dfixed_const(wm->num_heads);
8625         a.full = dfixed_div(a, b);
8626
8627         b.full = dfixed_const(mc_latency + 512);
8628         c.full = dfixed_const(wm->disp_clk);
8629         b.full = dfixed_div(b, c);
8630
8631         c.full = dfixed_const(dmif_size);
8632         b.full = dfixed_div(c, b);
8633
8634         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8635
8636         b.full = dfixed_const(1000);
8637         c.full = dfixed_const(wm->disp_clk);
8638         b.full = dfixed_div(c, b);
8639         c.full = dfixed_const(wm->bytes_per_pixel);
8640         b.full = dfixed_mul(b, c);
8641
8642         lb_fill_bw = min(tmp, dfixed_trunc(b));
8643
8644         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8645         b.full = dfixed_const(1000);
8646         c.full = dfixed_const(lb_fill_bw);
8647         b.full = dfixed_div(c, b);
8648         a.full = dfixed_div(a, b);
8649         line_fill_time = dfixed_trunc(a);
8650
8651         if (line_fill_time < wm->active_time)
8652                 return latency;
8653         else
8654                 return latency + (line_fill_time - wm->active_time);
8655
8656 }
8657
8658 /**
8659  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8660  * average and available dram bandwidth
8661  *
8662  * @wm: watermark calculation data
8663  *
8664  * Check if the display average bandwidth fits in the display
8665  * dram bandwidth (CIK).
8666  * Used for display watermark bandwidth calculations
8667  * Returns true if the display fits, false if not.
8668  */
8669 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8670 {
8671         if (dce8_average_bandwidth(wm) <=
8672             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8673                 return true;
8674         else
8675                 return false;
8676 }
8677
8678 /**
8679  * dce8_average_bandwidth_vs_available_bandwidth - check
8680  * average and available bandwidth
8681  *
8682  * @wm: watermark calculation data
8683  *
8684  * Check if the display average bandwidth fits in the display
8685  * available bandwidth (CIK).
8686  * Used for display watermark bandwidth calculations
8687  * Returns true if the display fits, false if not.
8688  */
8689 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8690 {
8691         if (dce8_average_bandwidth(wm) <=
8692             (dce8_available_bandwidth(wm) / wm->num_heads))
8693                 return true;
8694         else
8695                 return false;
8696 }
8697
8698 /**
8699  * dce8_check_latency_hiding - check latency hiding
8700  *
8701  * @wm: watermark calculation data
8702  *
8703  * Check latency hiding (CIK).
8704  * Used for display watermark bandwidth calculations
8705  * Returns true if the display fits, false if not.
8706  */
8707 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8708 {
8709         u32 lb_partitions = wm->lb_size / wm->src_width;
8710         u32 line_time = wm->active_time + wm->blank_time;
8711         u32 latency_tolerant_lines;
8712         u32 latency_hiding;
8713         fixed20_12 a;
8714
8715         a.full = dfixed_const(1);
8716         if (wm->vsc.full > a.full)
8717                 latency_tolerant_lines = 1;
8718         else {
8719                 if (lb_partitions <= (wm->vtaps + 1))
8720                         latency_tolerant_lines = 1;
8721                 else
8722                         latency_tolerant_lines = 2;
8723         }
8724
8725         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8726
8727         if (dce8_latency_watermark(wm) <= latency_hiding)
8728                 return true;
8729         else
8730                 return false;
8731 }
8732
8733 /**
8734  * dce8_program_watermarks - program display watermarks
8735  *
8736  * @rdev: radeon_device pointer
8737  * @radeon_crtc: the selected display controller
8738  * @lb_size: line buffer size
8739  * @num_heads: number of display controllers in use
8740  *
8741  * Calculate and program the display watermarks for the
8742  * selected display controller (CIK).
8743  */
8744 static void dce8_program_watermarks(struct radeon_device *rdev,
8745                                     struct radeon_crtc *radeon_crtc,
8746                                     u32 lb_size, u32 num_heads)
8747 {
8748         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8749         struct dce8_wm_params wm_low, wm_high;
8750         u32 pixel_period;
8751         u32 line_time = 0;
8752         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8753         u32 tmp, wm_mask;
8754
8755         if (radeon_crtc->base.enabled && num_heads && mode) {
8756                 pixel_period = 1000000 / (u32)mode->clock;
8757                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8758
8759                 /* watermark for high clocks */
8760                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8761                     rdev->pm.dpm_enabled) {
8762                         wm_high.yclk =
8763                                 radeon_dpm_get_mclk(rdev, false) * 10;
8764                         wm_high.sclk =
8765                                 radeon_dpm_get_sclk(rdev, false) * 10;
8766                 } else {
8767                         wm_high.yclk = rdev->pm.current_mclk * 10;
8768                         wm_high.sclk = rdev->pm.current_sclk * 10;
8769                 }
8770
8771                 wm_high.disp_clk = mode->clock;
8772                 wm_high.src_width = mode->crtc_hdisplay;
8773                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8774                 wm_high.blank_time = line_time - wm_high.active_time;
8775                 wm_high.interlaced = false;
8776                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8777                         wm_high.interlaced = true;
8778                 wm_high.vsc = radeon_crtc->vsc;
8779                 wm_high.vtaps = 1;
8780                 if (radeon_crtc->rmx_type != RMX_OFF)
8781                         wm_high.vtaps = 2;
8782                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8783                 wm_high.lb_size = lb_size;
8784                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8785                 wm_high.num_heads = num_heads;
8786
8787                 /* set for high clocks */
8788                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8789
8790                 /* possibly force display priority to high */
8791                 /* should really do this at mode validation time... */
8792                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8793                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8794                     !dce8_check_latency_hiding(&wm_high) ||
8795                     (rdev->disp_priority == 2)) {
8796                         DRM_DEBUG_KMS("force priority to high\n");
8797                 }
8798
8799                 /* watermark for low clocks */
8800                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8801                     rdev->pm.dpm_enabled) {
8802                         wm_low.yclk =
8803                                 radeon_dpm_get_mclk(rdev, true) * 10;
8804                         wm_low.sclk =
8805                                 radeon_dpm_get_sclk(rdev, true) * 10;
8806                 } else {
8807                         wm_low.yclk = rdev->pm.current_mclk * 10;
8808                         wm_low.sclk = rdev->pm.current_sclk * 10;
8809                 }
8810
8811                 wm_low.disp_clk = mode->clock;
8812                 wm_low.src_width = mode->crtc_hdisplay;
8813                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8814                 wm_low.blank_time = line_time - wm_low.active_time;
8815                 wm_low.interlaced = false;
8816                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8817                         wm_low.interlaced = true;
8818                 wm_low.vsc = radeon_crtc->vsc;
8819                 wm_low.vtaps = 1;
8820                 if (radeon_crtc->rmx_type != RMX_OFF)
8821                         wm_low.vtaps = 2;
8822                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8823                 wm_low.lb_size = lb_size;
8824                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8825                 wm_low.num_heads = num_heads;
8826
8827                 /* set for low clocks */
8828                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8829
8830                 /* possibly force display priority to high */
8831                 /* should really do this at mode validation time... */
8832                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8833                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8834                     !dce8_check_latency_hiding(&wm_low) ||
8835                     (rdev->disp_priority == 2)) {
8836                         DRM_DEBUG_KMS("force priority to high\n");
8837                 }
8838         }
8839
8840         /* select wm A */
8841         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8842         tmp = wm_mask;
8843         tmp &= ~LATENCY_WATERMARK_MASK(3);
8844         tmp |= LATENCY_WATERMARK_MASK(1);
8845         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8846         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8847                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8848                 LATENCY_HIGH_WATERMARK(line_time)));
8849         /* select wm B */
8850         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8851         tmp &= ~LATENCY_WATERMARK_MASK(3);
8852         tmp |= LATENCY_WATERMARK_MASK(2);
8853         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8854         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8855                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8856                 LATENCY_HIGH_WATERMARK(line_time)));
8857         /* restore original selection */
8858         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8859
8860         /* save values for DPM */
8861         radeon_crtc->line_time = line_time;
8862         radeon_crtc->wm_high = latency_watermark_a;
8863         radeon_crtc->wm_low = latency_watermark_b;
8864 }
8865
8866 /**
8867  * dce8_bandwidth_update - program display watermarks
8868  *
8869  * @rdev: radeon_device pointer
8870  *
8871  * Calculate and program the display watermarks and line
8872  * buffer allocation (CIK).
8873  */
8874 void dce8_bandwidth_update(struct radeon_device *rdev)
8875 {
8876         struct drm_display_mode *mode = NULL;
8877         u32 num_heads = 0, lb_size;
8878         int i;
8879
8880         radeon_update_display_priority(rdev);
8881
8882         for (i = 0; i < rdev->num_crtc; i++) {
8883                 if (rdev->mode_info.crtcs[i]->base.enabled)
8884                         num_heads++;
8885         }
8886         for (i = 0; i < rdev->num_crtc; i++) {
8887                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8888                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8889                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8890         }
8891 }
8892
8893 /**
8894  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8895  *
8896  * @rdev: radeon_device pointer
8897  *
8898  * Fetches a GPU clock counter snapshot (SI).
8899  * Returns the 64 bit clock counter snapshot.
8900  */
8901 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8902 {
8903         uint64_t clock;
8904
8905         mutex_lock(&rdev->gpu_clock_mutex);
8906         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8907         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8908                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8909         mutex_unlock(&rdev->gpu_clock_mutex);
8910         return clock;
8911 }
8912
8913 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8914                               u32 cntl_reg, u32 status_reg)
8915 {
8916         int r, i;
8917         struct atom_clock_dividers dividers;
8918         uint32_t tmp;
8919
8920         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8921                                            clock, false, &dividers);
8922         if (r)
8923                 return r;
8924
8925         tmp = RREG32_SMC(cntl_reg);
8926         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8927         tmp |= dividers.post_divider;
8928         WREG32_SMC(cntl_reg, tmp);
8929
8930         for (i = 0; i < 100; i++) {
8931                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8932                         break;
8933                 mdelay(10);
8934         }
8935         if (i == 100)
8936                 return -ETIMEDOUT;
8937
8938         return 0;
8939 }
8940
8941 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8942 {
8943         int r = 0;
8944
8945         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8946         if (r)
8947                 return r;
8948
8949         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8950         return r;
8951 }
8952
8953 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
8954 {
8955         int r, i;
8956         struct atom_clock_dividers dividers;
8957         u32 tmp;
8958
8959         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8960                                            ecclk, false, &dividers);
8961         if (r)
8962                 return r;
8963
8964         for (i = 0; i < 100; i++) {
8965                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8966                         break;
8967                 mdelay(10);
8968         }
8969         if (i == 100)
8970                 return -ETIMEDOUT;
8971
8972         tmp = RREG32_SMC(CG_ECLK_CNTL);
8973         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
8974         tmp |= dividers.post_divider;
8975         WREG32_SMC(CG_ECLK_CNTL, tmp);
8976
8977         for (i = 0; i < 100; i++) {
8978                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8979                         break;
8980                 mdelay(10);
8981         }
8982         if (i == 100)
8983                 return -ETIMEDOUT;
8984
8985         return 0;
8986 }
8987
8988 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8989 {
8990         struct pci_dev *root = rdev->pdev->bus->self;
8991         int bridge_pos, gpu_pos;
8992         u32 speed_cntl, mask, current_data_rate;
8993         int ret, i;
8994         u16 tmp16;
8995
8996         if (radeon_pcie_gen2 == 0)
8997                 return;
8998
8999         if (rdev->flags & RADEON_IS_IGP)
9000                 return;
9001
9002         if (!(rdev->flags & RADEON_IS_PCIE))
9003                 return;
9004
9005         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9006         if (ret != 0)
9007                 return;
9008
9009         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9010                 return;
9011
9012         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9013         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9014                 LC_CURRENT_DATA_RATE_SHIFT;
9015         if (mask & DRM_PCIE_SPEED_80) {
9016                 if (current_data_rate == 2) {
9017                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9018                         return;
9019                 }
9020                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9021         } else if (mask & DRM_PCIE_SPEED_50) {
9022                 if (current_data_rate == 1) {
9023                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9024                         return;
9025                 }
9026                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9027         }
9028
9029         bridge_pos = pci_pcie_cap(root);
9030         if (!bridge_pos)
9031                 return;
9032
9033         gpu_pos = pci_pcie_cap(rdev->pdev);
9034         if (!gpu_pos)
9035                 return;
9036
9037         if (mask & DRM_PCIE_SPEED_80) {
9038                 /* re-try equalization if gen3 is not already enabled */
9039                 if (current_data_rate != 2) {
9040                         u16 bridge_cfg, gpu_cfg;
9041                         u16 bridge_cfg2, gpu_cfg2;
9042                         u32 max_lw, current_lw, tmp;
9043
9044                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9045                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9046
9047                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9048                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9049
9050                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9051                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9052
9053                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9054                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9055                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9056
9057                         if (current_lw < max_lw) {
9058                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9059                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9060                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9061                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9062                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9063                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9064                                 }
9065                         }
9066
9067                         for (i = 0; i < 10; i++) {
9068                                 /* check status */
9069                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9070                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9071                                         break;
9072
9073                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9074                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9075
9076                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9077                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9078
9079                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9080                                 tmp |= LC_SET_QUIESCE;
9081                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9082
9083                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9084                                 tmp |= LC_REDO_EQ;
9085                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9086
9087                                 mdelay(100);
9088
9089                                 /* linkctl */
9090                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9091                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9092                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9093                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9094
9095                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9096                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9097                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9098                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9099
9100                                 /* linkctl2 */
9101                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9102                                 tmp16 &= ~((1 << 4) | (7 << 9));
9103                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9104                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9105
9106                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9107                                 tmp16 &= ~((1 << 4) | (7 << 9));
9108                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9109                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9110
9111                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9112                                 tmp &= ~LC_SET_QUIESCE;
9113                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9114                         }
9115                 }
9116         }
9117
9118         /* set the link speed */
9119         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9120         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9121         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9122
9123         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9124         tmp16 &= ~0xf;
9125         if (mask & DRM_PCIE_SPEED_80)
9126                 tmp16 |= 3; /* gen3 */
9127         else if (mask & DRM_PCIE_SPEED_50)
9128                 tmp16 |= 2; /* gen2 */
9129         else
9130                 tmp16 |= 1; /* gen1 */
9131         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9132
9133         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9134         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9135         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9136
9137         for (i = 0; i < rdev->usec_timeout; i++) {
9138                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9139                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9140                         break;
9141                 udelay(1);
9142         }
9143 }
9144
9145 static void cik_program_aspm(struct radeon_device *rdev)
9146 {
9147         u32 data, orig;
9148         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9149         bool disable_clkreq = false;
9150
9151         if (radeon_aspm == 0)
9152                 return;
9153
9154         /* XXX double check IGPs */
9155         if (rdev->flags & RADEON_IS_IGP)
9156                 return;
9157
9158         if (!(rdev->flags & RADEON_IS_PCIE))
9159                 return;
9160
9161         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9162         data &= ~LC_XMIT_N_FTS_MASK;
9163         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9164         if (orig != data)
9165                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9166
9167         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9168         data |= LC_GO_TO_RECOVERY;
9169         if (orig != data)
9170                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9171
9172         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9173         data |= P_IGNORE_EDB_ERR;
9174         if (orig != data)
9175                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9176
9177         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9178         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9179         data |= LC_PMI_TO_L1_DIS;
9180         if (!disable_l0s)
9181                 data |= LC_L0S_INACTIVITY(7);
9182
9183         if (!disable_l1) {
9184                 data |= LC_L1_INACTIVITY(7);
9185                 data &= ~LC_PMI_TO_L1_DIS;
9186                 if (orig != data)
9187                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9188
9189                 if (!disable_plloff_in_l1) {
9190                         bool clk_req_support;
9191
9192                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9193                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9194                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9195                         if (orig != data)
9196                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9197
9198                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9199                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9200                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9201                         if (orig != data)
9202                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9203
9204                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9205                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9206                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9207                         if (orig != data)
9208                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9209
9210                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9211                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9212                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9213                         if (orig != data)
9214                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9215
9216                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9217                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9218                         data |= LC_DYN_LANES_PWR_STATE(3);
9219                         if (orig != data)
9220                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9221
9222                         if (!disable_clkreq) {
9223                                 struct pci_dev *root = rdev->pdev->bus->self;
9224                                 u32 lnkcap;
9225
9226                                 clk_req_support = false;
9227                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9228                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9229                                         clk_req_support = true;
9230                         } else {
9231                                 clk_req_support = false;
9232                         }
9233
9234                         if (clk_req_support) {
9235                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9236                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9237                                 if (orig != data)
9238                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9239
9240                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9241                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9242                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9243                                 if (orig != data)
9244                                         WREG32_SMC(THM_CLK_CNTL, data);
9245
9246                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9247                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9248                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9249                                 if (orig != data)
9250                                         WREG32_SMC(MISC_CLK_CTRL, data);
9251
9252                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9253                                 data &= ~BCLK_AS_XCLK;
9254                                 if (orig != data)
9255                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9256
9257                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9258                                 data &= ~FORCE_BIF_REFCLK_EN;
9259                                 if (orig != data)
9260                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9261
9262                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9263                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9264                                 data |= MPLL_CLKOUT_SEL(4);
9265                                 if (orig != data)
9266                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9267                         }
9268                 }
9269         } else {
9270                 if (orig != data)
9271                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9272         }
9273
9274         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9275         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9276         if (orig != data)
9277                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9278
9279         if (!disable_l0s) {
9280                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9281                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9282                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9283                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9284                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9285                                 data &= ~LC_L0S_INACTIVITY_MASK;
9286                                 if (orig != data)
9287                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9288                         }
9289                 }
9290         }
9291 }