]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge remote-tracking branch 'hid/for-next'
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71                                  struct radeon_ib *ib,
72                                  uint64_t pe,
73                                  uint64_t addr, unsigned count,
74                                  uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_fini_pg(struct radeon_device *rdev);
81 static void cik_fini_cg(struct radeon_device *rdev);
82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
83                                           bool enable);
84
85 /* get temperature in millidegrees */
86 int ci_get_temp(struct radeon_device *rdev)
87 {
88         u32 temp;
89         int actual_temp = 0;
90
91         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
92                 CTF_TEMP_SHIFT;
93
94         if (temp & 0x200)
95                 actual_temp = 255;
96         else
97                 actual_temp = temp & 0x1ff;
98
99         actual_temp = actual_temp * 1000;
100
101         return actual_temp;
102 }
103
104 /* get temperature in millidegrees */
105 int kv_get_temp(struct radeon_device *rdev)
106 {
107         u32 temp;
108         int actual_temp = 0;
109
110         temp = RREG32_SMC(0xC0300E0C);
111
112         if (temp)
113                 actual_temp = (temp / 8) - 49;
114         else
115                 actual_temp = 0;
116
117         actual_temp = actual_temp * 1000;
118
119         return actual_temp;
120 }
121
122 /*
123  * Indirect registers accessor
124  */
125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
126 {
127         unsigned long flags;
128         u32 r;
129
130         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131         WREG32(PCIE_INDEX, reg);
132         (void)RREG32(PCIE_INDEX);
133         r = RREG32(PCIE_DATA);
134         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
135         return r;
136 }
137
138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
139 {
140         unsigned long flags;
141
142         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143         WREG32(PCIE_INDEX, reg);
144         (void)RREG32(PCIE_INDEX);
145         WREG32(PCIE_DATA, v);
146         (void)RREG32(PCIE_DATA);
147         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 }
149
150 static const u32 spectre_rlc_save_restore_register_list[] =
151 {
152         (0x0e00 << 16) | (0xc12c >> 2),
153         0x00000000,
154         (0x0e00 << 16) | (0xc140 >> 2),
155         0x00000000,
156         (0x0e00 << 16) | (0xc150 >> 2),
157         0x00000000,
158         (0x0e00 << 16) | (0xc15c >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0xc168 >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0xc170 >> 2),
163         0x00000000,
164         (0x0e00 << 16) | (0xc178 >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0xc204 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0xc2b4 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc2b8 >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0xc2bc >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0xc2c0 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0x8228 >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0x829c >> 2),
179         0x00000000,
180         (0x0e00 << 16) | (0x869c >> 2),
181         0x00000000,
182         (0x0600 << 16) | (0x98f4 >> 2),
183         0x00000000,
184         (0x0e00 << 16) | (0x98f8 >> 2),
185         0x00000000,
186         (0x0e00 << 16) | (0x9900 >> 2),
187         0x00000000,
188         (0x0e00 << 16) | (0xc260 >> 2),
189         0x00000000,
190         (0x0e00 << 16) | (0x90e8 >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0x3c000 >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0x3c00c >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0x8c1c >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0x9700 >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0xcd20 >> 2),
201         0x00000000,
202         (0x4e00 << 16) | (0xcd20 >> 2),
203         0x00000000,
204         (0x5e00 << 16) | (0xcd20 >> 2),
205         0x00000000,
206         (0x6e00 << 16) | (0xcd20 >> 2),
207         0x00000000,
208         (0x7e00 << 16) | (0xcd20 >> 2),
209         0x00000000,
210         (0x8e00 << 16) | (0xcd20 >> 2),
211         0x00000000,
212         (0x9e00 << 16) | (0xcd20 >> 2),
213         0x00000000,
214         (0xae00 << 16) | (0xcd20 >> 2),
215         0x00000000,
216         (0xbe00 << 16) | (0xcd20 >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0x89bc >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0x8900 >> 2),
221         0x00000000,
222         0x3,
223         (0x0e00 << 16) | (0xc130 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0xc134 >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0xc1fc >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc208 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc264 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc268 >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc26c >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc270 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc274 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc278 >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc27c >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc280 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc284 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc288 >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc28c >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0xc290 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0xc294 >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xc298 >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xc29c >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc2a0 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc2a4 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc2a8 >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc2ac  >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc2b0 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0x301d0 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0x30238 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0x30250 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x30254 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x30258 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0x3025c >> 2),
282         0x00000000,
283         (0x4e00 << 16) | (0xc900 >> 2),
284         0x00000000,
285         (0x5e00 << 16) | (0xc900 >> 2),
286         0x00000000,
287         (0x6e00 << 16) | (0xc900 >> 2),
288         0x00000000,
289         (0x7e00 << 16) | (0xc900 >> 2),
290         0x00000000,
291         (0x8e00 << 16) | (0xc900 >> 2),
292         0x00000000,
293         (0x9e00 << 16) | (0xc900 >> 2),
294         0x00000000,
295         (0xae00 << 16) | (0xc900 >> 2),
296         0x00000000,
297         (0xbe00 << 16) | (0xc900 >> 2),
298         0x00000000,
299         (0x4e00 << 16) | (0xc904 >> 2),
300         0x00000000,
301         (0x5e00 << 16) | (0xc904 >> 2),
302         0x00000000,
303         (0x6e00 << 16) | (0xc904 >> 2),
304         0x00000000,
305         (0x7e00 << 16) | (0xc904 >> 2),
306         0x00000000,
307         (0x8e00 << 16) | (0xc904 >> 2),
308         0x00000000,
309         (0x9e00 << 16) | (0xc904 >> 2),
310         0x00000000,
311         (0xae00 << 16) | (0xc904 >> 2),
312         0x00000000,
313         (0xbe00 << 16) | (0xc904 >> 2),
314         0x00000000,
315         (0x4e00 << 16) | (0xc908 >> 2),
316         0x00000000,
317         (0x5e00 << 16) | (0xc908 >> 2),
318         0x00000000,
319         (0x6e00 << 16) | (0xc908 >> 2),
320         0x00000000,
321         (0x7e00 << 16) | (0xc908 >> 2),
322         0x00000000,
323         (0x8e00 << 16) | (0xc908 >> 2),
324         0x00000000,
325         (0x9e00 << 16) | (0xc908 >> 2),
326         0x00000000,
327         (0xae00 << 16) | (0xc908 >> 2),
328         0x00000000,
329         (0xbe00 << 16) | (0xc908 >> 2),
330         0x00000000,
331         (0x4e00 << 16) | (0xc90c >> 2),
332         0x00000000,
333         (0x5e00 << 16) | (0xc90c >> 2),
334         0x00000000,
335         (0x6e00 << 16) | (0xc90c >> 2),
336         0x00000000,
337         (0x7e00 << 16) | (0xc90c >> 2),
338         0x00000000,
339         (0x8e00 << 16) | (0xc90c >> 2),
340         0x00000000,
341         (0x9e00 << 16) | (0xc90c >> 2),
342         0x00000000,
343         (0xae00 << 16) | (0xc90c >> 2),
344         0x00000000,
345         (0xbe00 << 16) | (0xc90c >> 2),
346         0x00000000,
347         (0x4e00 << 16) | (0xc910 >> 2),
348         0x00000000,
349         (0x5e00 << 16) | (0xc910 >> 2),
350         0x00000000,
351         (0x6e00 << 16) | (0xc910 >> 2),
352         0x00000000,
353         (0x7e00 << 16) | (0xc910 >> 2),
354         0x00000000,
355         (0x8e00 << 16) | (0xc910 >> 2),
356         0x00000000,
357         (0x9e00 << 16) | (0xc910 >> 2),
358         0x00000000,
359         (0xae00 << 16) | (0xc910 >> 2),
360         0x00000000,
361         (0xbe00 << 16) | (0xc910 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0xc99c >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0x9834 >> 2),
366         0x00000000,
367         (0x0000 << 16) | (0x30f00 >> 2),
368         0x00000000,
369         (0x0001 << 16) | (0x30f00 >> 2),
370         0x00000000,
371         (0x0000 << 16) | (0x30f04 >> 2),
372         0x00000000,
373         (0x0001 << 16) | (0x30f04 >> 2),
374         0x00000000,
375         (0x0000 << 16) | (0x30f08 >> 2),
376         0x00000000,
377         (0x0001 << 16) | (0x30f08 >> 2),
378         0x00000000,
379         (0x0000 << 16) | (0x30f0c >> 2),
380         0x00000000,
381         (0x0001 << 16) | (0x30f0c >> 2),
382         0x00000000,
383         (0x0600 << 16) | (0x9b7c >> 2),
384         0x00000000,
385         (0x0e00 << 16) | (0x8a14 >> 2),
386         0x00000000,
387         (0x0e00 << 16) | (0x8a18 >> 2),
388         0x00000000,
389         (0x0600 << 16) | (0x30a00 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x8bf0 >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x8bcc >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x8b24 >> 2),
396         0x00000000,
397         (0x0e00 << 16) | (0x30a04 >> 2),
398         0x00000000,
399         (0x0600 << 16) | (0x30a10 >> 2),
400         0x00000000,
401         (0x0600 << 16) | (0x30a14 >> 2),
402         0x00000000,
403         (0x0600 << 16) | (0x30a18 >> 2),
404         0x00000000,
405         (0x0600 << 16) | (0x30a2c >> 2),
406         0x00000000,
407         (0x0e00 << 16) | (0xc700 >> 2),
408         0x00000000,
409         (0x0e00 << 16) | (0xc704 >> 2),
410         0x00000000,
411         (0x0e00 << 16) | (0xc708 >> 2),
412         0x00000000,
413         (0x0e00 << 16) | (0xc768 >> 2),
414         0x00000000,
415         (0x0400 << 16) | (0xc770 >> 2),
416         0x00000000,
417         (0x0400 << 16) | (0xc774 >> 2),
418         0x00000000,
419         (0x0400 << 16) | (0xc778 >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc77c >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc780 >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc784 >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc788 >> 2),
428         0x00000000,
429         (0x0400 << 16) | (0xc78c >> 2),
430         0x00000000,
431         (0x0400 << 16) | (0xc798 >> 2),
432         0x00000000,
433         (0x0400 << 16) | (0xc79c >> 2),
434         0x00000000,
435         (0x0400 << 16) | (0xc7a0 >> 2),
436         0x00000000,
437         (0x0400 << 16) | (0xc7a4 >> 2),
438         0x00000000,
439         (0x0400 << 16) | (0xc7a8 >> 2),
440         0x00000000,
441         (0x0400 << 16) | (0xc7ac >> 2),
442         0x00000000,
443         (0x0400 << 16) | (0xc7b0 >> 2),
444         0x00000000,
445         (0x0400 << 16) | (0xc7b4 >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0x9100 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x3c010 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x92a8 >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x92ac >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x92b4 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x92b8 >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x92bc >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x92c0 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0x92c4 >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x92c8 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0x92cc >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0x92d0 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0x8c00 >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0x8c04 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0x8c20 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8c38 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0x8c3c >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0xae00 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0x9604 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xac08 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0xac0c >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0xac10 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac14 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac58 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac68 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0xac6c >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xac70 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xac74 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xac78 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xac7c >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xac80 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xac84 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xac88 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0xac8c >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x970c >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x9714 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x9718 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x971c >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x31068 >> 2),
524         0x00000000,
525         (0x4e00 << 16) | (0x31068 >> 2),
526         0x00000000,
527         (0x5e00 << 16) | (0x31068 >> 2),
528         0x00000000,
529         (0x6e00 << 16) | (0x31068 >> 2),
530         0x00000000,
531         (0x7e00 << 16) | (0x31068 >> 2),
532         0x00000000,
533         (0x8e00 << 16) | (0x31068 >> 2),
534         0x00000000,
535         (0x9e00 << 16) | (0x31068 >> 2),
536         0x00000000,
537         (0xae00 << 16) | (0x31068 >> 2),
538         0x00000000,
539         (0xbe00 << 16) | (0x31068 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0xcd10 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xcd14 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0x88b0 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x88b4 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x88b8 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x88bc >> 2),
552         0x00000000,
553         (0x0400 << 16) | (0x89c0 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x88c4 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x88c8 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x88d0 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x88d4 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x88d8 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x8980 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x30938 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x3093c >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x30940 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x89a0 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x30900 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x30904 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x89b4 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x3c210 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x3c214 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x3c218 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x8904 >> 2),
588         0x00000000,
589         0x5,
590         (0x0e00 << 16) | (0x8c28 >> 2),
591         (0x0e00 << 16) | (0x8c2c >> 2),
592         (0x0e00 << 16) | (0x8c30 >> 2),
593         (0x0e00 << 16) | (0x8c34 >> 2),
594         (0x0e00 << 16) | (0x9600 >> 2),
595 };
596
597 static const u32 kalindi_rlc_save_restore_register_list[] =
598 {
599         (0x0e00 << 16) | (0xc12c >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xc140 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xc150 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xc15c >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xc168 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xc170 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xc204 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xc2b4 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc2b8 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xc2bc >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xc2c0 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x8228 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x829c >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x869c >> 2),
626         0x00000000,
627         (0x0600 << 16) | (0x98f4 >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x98f8 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x9900 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0xc260 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x90e8 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x3c000 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x3c00c >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x8c1c >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x9700 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0xcd20 >> 2),
646         0x00000000,
647         (0x4e00 << 16) | (0xcd20 >> 2),
648         0x00000000,
649         (0x5e00 << 16) | (0xcd20 >> 2),
650         0x00000000,
651         (0x6e00 << 16) | (0xcd20 >> 2),
652         0x00000000,
653         (0x7e00 << 16) | (0xcd20 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0x89bc >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0x8900 >> 2),
658         0x00000000,
659         0x3,
660         (0x0e00 << 16) | (0xc130 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xc134 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc1fc >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc208 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc264 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc268 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc26c >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc270 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc274 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc28c >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0xc290 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0xc294 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0xc298 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xc2a0 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0xc2a4 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc2a8 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0xc2ac >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x301d0 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30238 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30250 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x30254 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x30258 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x3025c >> 2),
705         0x00000000,
706         (0x4e00 << 16) | (0xc900 >> 2),
707         0x00000000,
708         (0x5e00 << 16) | (0xc900 >> 2),
709         0x00000000,
710         (0x6e00 << 16) | (0xc900 >> 2),
711         0x00000000,
712         (0x7e00 << 16) | (0xc900 >> 2),
713         0x00000000,
714         (0x4e00 << 16) | (0xc904 >> 2),
715         0x00000000,
716         (0x5e00 << 16) | (0xc904 >> 2),
717         0x00000000,
718         (0x6e00 << 16) | (0xc904 >> 2),
719         0x00000000,
720         (0x7e00 << 16) | (0xc904 >> 2),
721         0x00000000,
722         (0x4e00 << 16) | (0xc908 >> 2),
723         0x00000000,
724         (0x5e00 << 16) | (0xc908 >> 2),
725         0x00000000,
726         (0x6e00 << 16) | (0xc908 >> 2),
727         0x00000000,
728         (0x7e00 << 16) | (0xc908 >> 2),
729         0x00000000,
730         (0x4e00 << 16) | (0xc90c >> 2),
731         0x00000000,
732         (0x5e00 << 16) | (0xc90c >> 2),
733         0x00000000,
734         (0x6e00 << 16) | (0xc90c >> 2),
735         0x00000000,
736         (0x7e00 << 16) | (0xc90c >> 2),
737         0x00000000,
738         (0x4e00 << 16) | (0xc910 >> 2),
739         0x00000000,
740         (0x5e00 << 16) | (0xc910 >> 2),
741         0x00000000,
742         (0x6e00 << 16) | (0xc910 >> 2),
743         0x00000000,
744         (0x7e00 << 16) | (0xc910 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0xc99c >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x9834 >> 2),
749         0x00000000,
750         (0x0000 << 16) | (0x30f00 >> 2),
751         0x00000000,
752         (0x0000 << 16) | (0x30f04 >> 2),
753         0x00000000,
754         (0x0000 << 16) | (0x30f08 >> 2),
755         0x00000000,
756         (0x0000 << 16) | (0x30f0c >> 2),
757         0x00000000,
758         (0x0600 << 16) | (0x9b7c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x8a14 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x8a18 >> 2),
763         0x00000000,
764         (0x0600 << 16) | (0x30a00 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0x8bf0 >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0x8bcc >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0x8b24 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x30a04 >> 2),
773         0x00000000,
774         (0x0600 << 16) | (0x30a10 >> 2),
775         0x00000000,
776         (0x0600 << 16) | (0x30a14 >> 2),
777         0x00000000,
778         (0x0600 << 16) | (0x30a18 >> 2),
779         0x00000000,
780         (0x0600 << 16) | (0x30a2c >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc700 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc704 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc708 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc768 >> 2),
789         0x00000000,
790         (0x0400 << 16) | (0xc770 >> 2),
791         0x00000000,
792         (0x0400 << 16) | (0xc774 >> 2),
793         0x00000000,
794         (0x0400 << 16) | (0xc798 >> 2),
795         0x00000000,
796         (0x0400 << 16) | (0xc79c >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0x9100 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x3c010 >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x8c00 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x8c04 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8c20 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8c38 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x8c3c >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0xae00 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0x9604 >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0xac08 >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0xac0c >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xac10 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac14 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac58 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac68 >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xac6c >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0xac70 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0xac74 >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0xac78 >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0xac7c >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xac80 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xac84 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xac88 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xac8c >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0x970c >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x9714 >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x9718 >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x971c >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x31068 >> 2),
855         0x00000000,
856         (0x4e00 << 16) | (0x31068 >> 2),
857         0x00000000,
858         (0x5e00 << 16) | (0x31068 >> 2),
859         0x00000000,
860         (0x6e00 << 16) | (0x31068 >> 2),
861         0x00000000,
862         (0x7e00 << 16) | (0x31068 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0xcd10 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0xcd14 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x88b0 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x88b4 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x88b8 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0x88bc >> 2),
875         0x00000000,
876         (0x0400 << 16) | (0x89c0 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x88c4 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0x88c8 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x88d0 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x88d4 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x88d8 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x8980 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x30938 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x3093c >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x30940 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x89a0 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x30900 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x30904 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x89b4 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x3e1fc >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x3c210 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x3c214 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x3c218 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x8904 >> 2),
913         0x00000000,
914         0x5,
915         (0x0e00 << 16) | (0x8c28 >> 2),
916         (0x0e00 << 16) | (0x8c2c >> 2),
917         (0x0e00 << 16) | (0x8c30 >> 2),
918         (0x0e00 << 16) | (0x8c34 >> 2),
919         (0x0e00 << 16) | (0x9600 >> 2),
920 };
921
922 static const u32 bonaire_golden_spm_registers[] =
923 {
924         0x30800, 0xe0ffffff, 0xe0000000
925 };
926
927 static const u32 bonaire_golden_common_registers[] =
928 {
929         0xc770, 0xffffffff, 0x00000800,
930         0xc774, 0xffffffff, 0x00000800,
931         0xc798, 0xffffffff, 0x00007fbf,
932         0xc79c, 0xffffffff, 0x00007faf
933 };
934
935 static const u32 bonaire_golden_registers[] =
936 {
937         0x3354, 0x00000333, 0x00000333,
938         0x3350, 0x000c0fc0, 0x00040200,
939         0x9a10, 0x00010000, 0x00058208,
940         0x3c000, 0xffff1fff, 0x00140000,
941         0x3c200, 0xfdfc0fff, 0x00000100,
942         0x3c234, 0x40000000, 0x40000200,
943         0x9830, 0xffffffff, 0x00000000,
944         0x9834, 0xf00fffff, 0x00000400,
945         0x9838, 0x0002021c, 0x00020200,
946         0xc78, 0x00000080, 0x00000000,
947         0x5bb0, 0x000000f0, 0x00000070,
948         0x5bc0, 0xf0311fff, 0x80300000,
949         0x98f8, 0x73773777, 0x12010001,
950         0x350c, 0x00810000, 0x408af000,
951         0x7030, 0x31000111, 0x00000011,
952         0x2f48, 0x73773777, 0x12010001,
953         0x220c, 0x00007fb6, 0x0021a1b1,
954         0x2210, 0x00007fb6, 0x002021b1,
955         0x2180, 0x00007fb6, 0x00002191,
956         0x2218, 0x00007fb6, 0x002121b1,
957         0x221c, 0x00007fb6, 0x002021b1,
958         0x21dc, 0x00007fb6, 0x00002191,
959         0x21e0, 0x00007fb6, 0x00002191,
960         0x3628, 0x0000003f, 0x0000000a,
961         0x362c, 0x0000003f, 0x0000000a,
962         0x2ae4, 0x00073ffe, 0x000022a2,
963         0x240c, 0x000007ff, 0x00000000,
964         0x8a14, 0xf000003f, 0x00000007,
965         0x8bf0, 0x00002001, 0x00000001,
966         0x8b24, 0xffffffff, 0x00ffffff,
967         0x30a04, 0x0000ff0f, 0x00000000,
968         0x28a4c, 0x07ffffff, 0x06000000,
969         0x4d8, 0x00000fff, 0x00000100,
970         0x3e78, 0x00000001, 0x00000002,
971         0x9100, 0x03000000, 0x0362c688,
972         0x8c00, 0x000000ff, 0x00000001,
973         0xe40, 0x00001fff, 0x00001fff,
974         0x9060, 0x0000007f, 0x00000020,
975         0x9508, 0x00010000, 0x00010000,
976         0xac14, 0x000003ff, 0x000000f3,
977         0xac0c, 0xffffffff, 0x00001032
978 };
979
980 static const u32 bonaire_mgcg_cgcg_init[] =
981 {
982         0xc420, 0xffffffff, 0xfffffffc,
983         0x30800, 0xffffffff, 0xe0000000,
984         0x3c2a0, 0xffffffff, 0x00000100,
985         0x3c208, 0xffffffff, 0x00000100,
986         0x3c2c0, 0xffffffff, 0xc0000100,
987         0x3c2c8, 0xffffffff, 0xc0000100,
988         0x3c2c4, 0xffffffff, 0xc0000100,
989         0x55e4, 0xffffffff, 0x00600100,
990         0x3c280, 0xffffffff, 0x00000100,
991         0x3c214, 0xffffffff, 0x06000100,
992         0x3c220, 0xffffffff, 0x00000100,
993         0x3c218, 0xffffffff, 0x06000100,
994         0x3c204, 0xffffffff, 0x00000100,
995         0x3c2e0, 0xffffffff, 0x00000100,
996         0x3c224, 0xffffffff, 0x00000100,
997         0x3c200, 0xffffffff, 0x00000100,
998         0x3c230, 0xffffffff, 0x00000100,
999         0x3c234, 0xffffffff, 0x00000100,
1000         0x3c250, 0xffffffff, 0x00000100,
1001         0x3c254, 0xffffffff, 0x00000100,
1002         0x3c258, 0xffffffff, 0x00000100,
1003         0x3c25c, 0xffffffff, 0x00000100,
1004         0x3c260, 0xffffffff, 0x00000100,
1005         0x3c27c, 0xffffffff, 0x00000100,
1006         0x3c278, 0xffffffff, 0x00000100,
1007         0x3c210, 0xffffffff, 0x06000100,
1008         0x3c290, 0xffffffff, 0x00000100,
1009         0x3c274, 0xffffffff, 0x00000100,
1010         0x3c2b4, 0xffffffff, 0x00000100,
1011         0x3c2b0, 0xffffffff, 0x00000100,
1012         0x3c270, 0xffffffff, 0x00000100,
1013         0x30800, 0xffffffff, 0xe0000000,
1014         0x3c020, 0xffffffff, 0x00010000,
1015         0x3c024, 0xffffffff, 0x00030002,
1016         0x3c028, 0xffffffff, 0x00040007,
1017         0x3c02c, 0xffffffff, 0x00060005,
1018         0x3c030, 0xffffffff, 0x00090008,
1019         0x3c034, 0xffffffff, 0x00010000,
1020         0x3c038, 0xffffffff, 0x00030002,
1021         0x3c03c, 0xffffffff, 0x00040007,
1022         0x3c040, 0xffffffff, 0x00060005,
1023         0x3c044, 0xffffffff, 0x00090008,
1024         0x3c048, 0xffffffff, 0x00010000,
1025         0x3c04c, 0xffffffff, 0x00030002,
1026         0x3c050, 0xffffffff, 0x00040007,
1027         0x3c054, 0xffffffff, 0x00060005,
1028         0x3c058, 0xffffffff, 0x00090008,
1029         0x3c05c, 0xffffffff, 0x00010000,
1030         0x3c060, 0xffffffff, 0x00030002,
1031         0x3c064, 0xffffffff, 0x00040007,
1032         0x3c068, 0xffffffff, 0x00060005,
1033         0x3c06c, 0xffffffff, 0x00090008,
1034         0x3c070, 0xffffffff, 0x00010000,
1035         0x3c074, 0xffffffff, 0x00030002,
1036         0x3c078, 0xffffffff, 0x00040007,
1037         0x3c07c, 0xffffffff, 0x00060005,
1038         0x3c080, 0xffffffff, 0x00090008,
1039         0x3c084, 0xffffffff, 0x00010000,
1040         0x3c088, 0xffffffff, 0x00030002,
1041         0x3c08c, 0xffffffff, 0x00040007,
1042         0x3c090, 0xffffffff, 0x00060005,
1043         0x3c094, 0xffffffff, 0x00090008,
1044         0x3c098, 0xffffffff, 0x00010000,
1045         0x3c09c, 0xffffffff, 0x00030002,
1046         0x3c0a0, 0xffffffff, 0x00040007,
1047         0x3c0a4, 0xffffffff, 0x00060005,
1048         0x3c0a8, 0xffffffff, 0x00090008,
1049         0x3c000, 0xffffffff, 0x96e00200,
1050         0x8708, 0xffffffff, 0x00900100,
1051         0xc424, 0xffffffff, 0x0020003f,
1052         0x38, 0xffffffff, 0x0140001c,
1053         0x3c, 0x000f0000, 0x000f0000,
1054         0x220, 0xffffffff, 0xC060000C,
1055         0x224, 0xc0000fff, 0x00000100,
1056         0xf90, 0xffffffff, 0x00000100,
1057         0xf98, 0x00000101, 0x00000000,
1058         0x20a8, 0xffffffff, 0x00000104,
1059         0x55e4, 0xff000fff, 0x00000100,
1060         0x30cc, 0xc0000fff, 0x00000104,
1061         0xc1e4, 0x00000001, 0x00000001,
1062         0xd00c, 0xff000ff0, 0x00000100,
1063         0xd80c, 0xff000ff0, 0x00000100
1064 };
1065
1066 static const u32 spectre_golden_spm_registers[] =
1067 {
1068         0x30800, 0xe0ffffff, 0xe0000000
1069 };
1070
1071 static const u32 spectre_golden_common_registers[] =
1072 {
1073         0xc770, 0xffffffff, 0x00000800,
1074         0xc774, 0xffffffff, 0x00000800,
1075         0xc798, 0xffffffff, 0x00007fbf,
1076         0xc79c, 0xffffffff, 0x00007faf
1077 };
1078
1079 static const u32 spectre_golden_registers[] =
1080 {
1081         0x3c000, 0xffff1fff, 0x96940200,
1082         0x3c00c, 0xffff0001, 0xff000000,
1083         0x3c200, 0xfffc0fff, 0x00000100,
1084         0x6ed8, 0x00010101, 0x00010000,
1085         0x9834, 0xf00fffff, 0x00000400,
1086         0x9838, 0xfffffffc, 0x00020200,
1087         0x5bb0, 0x000000f0, 0x00000070,
1088         0x5bc0, 0xf0311fff, 0x80300000,
1089         0x98f8, 0x73773777, 0x12010001,
1090         0x9b7c, 0x00ff0000, 0x00fc0000,
1091         0x2f48, 0x73773777, 0x12010001,
1092         0x8a14, 0xf000003f, 0x00000007,
1093         0x8b24, 0xffffffff, 0x00ffffff,
1094         0x28350, 0x3f3f3fff, 0x00000082,
1095         0x28355, 0x0000003f, 0x00000000,
1096         0x3e78, 0x00000001, 0x00000002,
1097         0x913c, 0xffff03df, 0x00000004,
1098         0xc768, 0x00000008, 0x00000008,
1099         0x8c00, 0x000008ff, 0x00000800,
1100         0x9508, 0x00010000, 0x00010000,
1101         0xac0c, 0xffffffff, 0x54763210,
1102         0x214f8, 0x01ff01ff, 0x00000002,
1103         0x21498, 0x007ff800, 0x00200000,
1104         0x2015c, 0xffffffff, 0x00000f40,
1105         0x30934, 0xffffffff, 0x00000001
1106 };
1107
1108 static const u32 spectre_mgcg_cgcg_init[] =
1109 {
1110         0xc420, 0xffffffff, 0xfffffffc,
1111         0x30800, 0xffffffff, 0xe0000000,
1112         0x3c2a0, 0xffffffff, 0x00000100,
1113         0x3c208, 0xffffffff, 0x00000100,
1114         0x3c2c0, 0xffffffff, 0x00000100,
1115         0x3c2c8, 0xffffffff, 0x00000100,
1116         0x3c2c4, 0xffffffff, 0x00000100,
1117         0x55e4, 0xffffffff, 0x00600100,
1118         0x3c280, 0xffffffff, 0x00000100,
1119         0x3c214, 0xffffffff, 0x06000100,
1120         0x3c220, 0xffffffff, 0x00000100,
1121         0x3c218, 0xffffffff, 0x06000100,
1122         0x3c204, 0xffffffff, 0x00000100,
1123         0x3c2e0, 0xffffffff, 0x00000100,
1124         0x3c224, 0xffffffff, 0x00000100,
1125         0x3c200, 0xffffffff, 0x00000100,
1126         0x3c230, 0xffffffff, 0x00000100,
1127         0x3c234, 0xffffffff, 0x00000100,
1128         0x3c250, 0xffffffff, 0x00000100,
1129         0x3c254, 0xffffffff, 0x00000100,
1130         0x3c258, 0xffffffff, 0x00000100,
1131         0x3c25c, 0xffffffff, 0x00000100,
1132         0x3c260, 0xffffffff, 0x00000100,
1133         0x3c27c, 0xffffffff, 0x00000100,
1134         0x3c278, 0xffffffff, 0x00000100,
1135         0x3c210, 0xffffffff, 0x06000100,
1136         0x3c290, 0xffffffff, 0x00000100,
1137         0x3c274, 0xffffffff, 0x00000100,
1138         0x3c2b4, 0xffffffff, 0x00000100,
1139         0x3c2b0, 0xffffffff, 0x00000100,
1140         0x3c270, 0xffffffff, 0x00000100,
1141         0x30800, 0xffffffff, 0xe0000000,
1142         0x3c020, 0xffffffff, 0x00010000,
1143         0x3c024, 0xffffffff, 0x00030002,
1144         0x3c028, 0xffffffff, 0x00040007,
1145         0x3c02c, 0xffffffff, 0x00060005,
1146         0x3c030, 0xffffffff, 0x00090008,
1147         0x3c034, 0xffffffff, 0x00010000,
1148         0x3c038, 0xffffffff, 0x00030002,
1149         0x3c03c, 0xffffffff, 0x00040007,
1150         0x3c040, 0xffffffff, 0x00060005,
1151         0x3c044, 0xffffffff, 0x00090008,
1152         0x3c048, 0xffffffff, 0x00010000,
1153         0x3c04c, 0xffffffff, 0x00030002,
1154         0x3c050, 0xffffffff, 0x00040007,
1155         0x3c054, 0xffffffff, 0x00060005,
1156         0x3c058, 0xffffffff, 0x00090008,
1157         0x3c05c, 0xffffffff, 0x00010000,
1158         0x3c060, 0xffffffff, 0x00030002,
1159         0x3c064, 0xffffffff, 0x00040007,
1160         0x3c068, 0xffffffff, 0x00060005,
1161         0x3c06c, 0xffffffff, 0x00090008,
1162         0x3c070, 0xffffffff, 0x00010000,
1163         0x3c074, 0xffffffff, 0x00030002,
1164         0x3c078, 0xffffffff, 0x00040007,
1165         0x3c07c, 0xffffffff, 0x00060005,
1166         0x3c080, 0xffffffff, 0x00090008,
1167         0x3c084, 0xffffffff, 0x00010000,
1168         0x3c088, 0xffffffff, 0x00030002,
1169         0x3c08c, 0xffffffff, 0x00040007,
1170         0x3c090, 0xffffffff, 0x00060005,
1171         0x3c094, 0xffffffff, 0x00090008,
1172         0x3c098, 0xffffffff, 0x00010000,
1173         0x3c09c, 0xffffffff, 0x00030002,
1174         0x3c0a0, 0xffffffff, 0x00040007,
1175         0x3c0a4, 0xffffffff, 0x00060005,
1176         0x3c0a8, 0xffffffff, 0x00090008,
1177         0x3c0ac, 0xffffffff, 0x00010000,
1178         0x3c0b0, 0xffffffff, 0x00030002,
1179         0x3c0b4, 0xffffffff, 0x00040007,
1180         0x3c0b8, 0xffffffff, 0x00060005,
1181         0x3c0bc, 0xffffffff, 0x00090008,
1182         0x3c000, 0xffffffff, 0x96e00200,
1183         0x8708, 0xffffffff, 0x00900100,
1184         0xc424, 0xffffffff, 0x0020003f,
1185         0x38, 0xffffffff, 0x0140001c,
1186         0x3c, 0x000f0000, 0x000f0000,
1187         0x220, 0xffffffff, 0xC060000C,
1188         0x224, 0xc0000fff, 0x00000100,
1189         0xf90, 0xffffffff, 0x00000100,
1190         0xf98, 0x00000101, 0x00000000,
1191         0x20a8, 0xffffffff, 0x00000104,
1192         0x55e4, 0xff000fff, 0x00000100,
1193         0x30cc, 0xc0000fff, 0x00000104,
1194         0xc1e4, 0x00000001, 0x00000001,
1195         0xd00c, 0xff000ff0, 0x00000100,
1196         0xd80c, 0xff000ff0, 0x00000100
1197 };
1198
1199 static const u32 kalindi_golden_spm_registers[] =
1200 {
1201         0x30800, 0xe0ffffff, 0xe0000000
1202 };
1203
1204 static const u32 kalindi_golden_common_registers[] =
1205 {
1206         0xc770, 0xffffffff, 0x00000800,
1207         0xc774, 0xffffffff, 0x00000800,
1208         0xc798, 0xffffffff, 0x00007fbf,
1209         0xc79c, 0xffffffff, 0x00007faf
1210 };
1211
1212 static const u32 kalindi_golden_registers[] =
1213 {
1214         0x3c000, 0xffffdfff, 0x6e944040,
1215         0x55e4, 0xff607fff, 0xfc000100,
1216         0x3c220, 0xff000fff, 0x00000100,
1217         0x3c224, 0xff000fff, 0x00000100,
1218         0x3c200, 0xfffc0fff, 0x00000100,
1219         0x6ed8, 0x00010101, 0x00010000,
1220         0x9830, 0xffffffff, 0x00000000,
1221         0x9834, 0xf00fffff, 0x00000400,
1222         0x5bb0, 0x000000f0, 0x00000070,
1223         0x5bc0, 0xf0311fff, 0x80300000,
1224         0x98f8, 0x73773777, 0x12010001,
1225         0x98fc, 0xffffffff, 0x00000010,
1226         0x9b7c, 0x00ff0000, 0x00fc0000,
1227         0x8030, 0x00001f0f, 0x0000100a,
1228         0x2f48, 0x73773777, 0x12010001,
1229         0x2408, 0x000fffff, 0x000c007f,
1230         0x8a14, 0xf000003f, 0x00000007,
1231         0x8b24, 0x3fff3fff, 0x00ffcfff,
1232         0x30a04, 0x0000ff0f, 0x00000000,
1233         0x28a4c, 0x07ffffff, 0x06000000,
1234         0x4d8, 0x00000fff, 0x00000100,
1235         0x3e78, 0x00000001, 0x00000002,
1236         0xc768, 0x00000008, 0x00000008,
1237         0x8c00, 0x000000ff, 0x00000003,
1238         0x214f8, 0x01ff01ff, 0x00000002,
1239         0x21498, 0x007ff800, 0x00200000,
1240         0x2015c, 0xffffffff, 0x00000f40,
1241         0x88c4, 0x001f3ae3, 0x00000082,
1242         0x88d4, 0x0000001f, 0x00000010,
1243         0x30934, 0xffffffff, 0x00000000
1244 };
1245
1246 static const u32 kalindi_mgcg_cgcg_init[] =
1247 {
1248         0xc420, 0xffffffff, 0xfffffffc,
1249         0x30800, 0xffffffff, 0xe0000000,
1250         0x3c2a0, 0xffffffff, 0x00000100,
1251         0x3c208, 0xffffffff, 0x00000100,
1252         0x3c2c0, 0xffffffff, 0x00000100,
1253         0x3c2c8, 0xffffffff, 0x00000100,
1254         0x3c2c4, 0xffffffff, 0x00000100,
1255         0x55e4, 0xffffffff, 0x00600100,
1256         0x3c280, 0xffffffff, 0x00000100,
1257         0x3c214, 0xffffffff, 0x06000100,
1258         0x3c220, 0xffffffff, 0x00000100,
1259         0x3c218, 0xffffffff, 0x06000100,
1260         0x3c204, 0xffffffff, 0x00000100,
1261         0x3c2e0, 0xffffffff, 0x00000100,
1262         0x3c224, 0xffffffff, 0x00000100,
1263         0x3c200, 0xffffffff, 0x00000100,
1264         0x3c230, 0xffffffff, 0x00000100,
1265         0x3c234, 0xffffffff, 0x00000100,
1266         0x3c250, 0xffffffff, 0x00000100,
1267         0x3c254, 0xffffffff, 0x00000100,
1268         0x3c258, 0xffffffff, 0x00000100,
1269         0x3c25c, 0xffffffff, 0x00000100,
1270         0x3c260, 0xffffffff, 0x00000100,
1271         0x3c27c, 0xffffffff, 0x00000100,
1272         0x3c278, 0xffffffff, 0x00000100,
1273         0x3c210, 0xffffffff, 0x06000100,
1274         0x3c290, 0xffffffff, 0x00000100,
1275         0x3c274, 0xffffffff, 0x00000100,
1276         0x3c2b4, 0xffffffff, 0x00000100,
1277         0x3c2b0, 0xffffffff, 0x00000100,
1278         0x3c270, 0xffffffff, 0x00000100,
1279         0x30800, 0xffffffff, 0xe0000000,
1280         0x3c020, 0xffffffff, 0x00010000,
1281         0x3c024, 0xffffffff, 0x00030002,
1282         0x3c028, 0xffffffff, 0x00040007,
1283         0x3c02c, 0xffffffff, 0x00060005,
1284         0x3c030, 0xffffffff, 0x00090008,
1285         0x3c034, 0xffffffff, 0x00010000,
1286         0x3c038, 0xffffffff, 0x00030002,
1287         0x3c03c, 0xffffffff, 0x00040007,
1288         0x3c040, 0xffffffff, 0x00060005,
1289         0x3c044, 0xffffffff, 0x00090008,
1290         0x3c000, 0xffffffff, 0x96e00200,
1291         0x8708, 0xffffffff, 0x00900100,
1292         0xc424, 0xffffffff, 0x0020003f,
1293         0x38, 0xffffffff, 0x0140001c,
1294         0x3c, 0x000f0000, 0x000f0000,
1295         0x220, 0xffffffff, 0xC060000C,
1296         0x224, 0xc0000fff, 0x00000100,
1297         0x20a8, 0xffffffff, 0x00000104,
1298         0x55e4, 0xff000fff, 0x00000100,
1299         0x30cc, 0xc0000fff, 0x00000104,
1300         0xc1e4, 0x00000001, 0x00000001,
1301         0xd00c, 0xff000ff0, 0x00000100,
1302         0xd80c, 0xff000ff0, 0x00000100
1303 };
1304
1305 static void cik_init_golden_registers(struct radeon_device *rdev)
1306 {
1307         switch (rdev->family) {
1308         case CHIP_BONAIRE:
1309                 radeon_program_register_sequence(rdev,
1310                                                  bonaire_mgcg_cgcg_init,
1311                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312                 radeon_program_register_sequence(rdev,
1313                                                  bonaire_golden_registers,
1314                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315                 radeon_program_register_sequence(rdev,
1316                                                  bonaire_golden_common_registers,
1317                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318                 radeon_program_register_sequence(rdev,
1319                                                  bonaire_golden_spm_registers,
1320                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1321                 break;
1322         case CHIP_KABINI:
1323                 radeon_program_register_sequence(rdev,
1324                                                  kalindi_mgcg_cgcg_init,
1325                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326                 radeon_program_register_sequence(rdev,
1327                                                  kalindi_golden_registers,
1328                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329                 radeon_program_register_sequence(rdev,
1330                                                  kalindi_golden_common_registers,
1331                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332                 radeon_program_register_sequence(rdev,
1333                                                  kalindi_golden_spm_registers,
1334                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1335                 break;
1336         case CHIP_KAVERI:
1337                 radeon_program_register_sequence(rdev,
1338                                                  spectre_mgcg_cgcg_init,
1339                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340                 radeon_program_register_sequence(rdev,
1341                                                  spectre_golden_registers,
1342                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1343                 radeon_program_register_sequence(rdev,
1344                                                  spectre_golden_common_registers,
1345                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346                 radeon_program_register_sequence(rdev,
1347                                                  spectre_golden_spm_registers,
1348                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1349                 break;
1350         default:
1351                 break;
1352         }
1353 }
1354
1355 /**
1356  * cik_get_xclk - get the xclk
1357  *
1358  * @rdev: radeon_device pointer
1359  *
1360  * Returns the reference clock used by the gfx engine
1361  * (CIK).
1362  */
1363 u32 cik_get_xclk(struct radeon_device *rdev)
1364 {
1365         u32 reference_clock = rdev->clock.spll.reference_freq;
1366
1367         if (rdev->flags & RADEON_IS_IGP) {
1368                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369                         return reference_clock / 2;
1370         } else {
1371                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372                         return reference_clock / 4;
1373         }
1374         return reference_clock;
1375 }
1376
1377 /**
1378  * cik_mm_rdoorbell - read a doorbell dword
1379  *
1380  * @rdev: radeon_device pointer
1381  * @offset: byte offset into the aperture
1382  *
1383  * Returns the value in the doorbell aperture at the
1384  * requested offset (CIK).
1385  */
1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1387 {
1388         if (offset < rdev->doorbell.size) {
1389                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1390         } else {
1391                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1392                 return 0;
1393         }
1394 }
1395
1396 /**
1397  * cik_mm_wdoorbell - write a doorbell dword
1398  *
1399  * @rdev: radeon_device pointer
1400  * @offset: byte offset into the aperture
1401  * @v: value to write
1402  *
1403  * Writes @v to the doorbell aperture at the
1404  * requested offset (CIK).
1405  */
1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1407 {
1408         if (offset < rdev->doorbell.size) {
1409                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1410         } else {
1411                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1412         }
1413 }
1414
1415 #define BONAIRE_IO_MC_REGS_SIZE 36
1416
1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1418 {
1419         {0x00000070, 0x04400000},
1420         {0x00000071, 0x80c01803},
1421         {0x00000072, 0x00004004},
1422         {0x00000073, 0x00000100},
1423         {0x00000074, 0x00ff0000},
1424         {0x00000075, 0x34000000},
1425         {0x00000076, 0x08000014},
1426         {0x00000077, 0x00cc08ec},
1427         {0x00000078, 0x00000400},
1428         {0x00000079, 0x00000000},
1429         {0x0000007a, 0x04090000},
1430         {0x0000007c, 0x00000000},
1431         {0x0000007e, 0x4408a8e8},
1432         {0x0000007f, 0x00000304},
1433         {0x00000080, 0x00000000},
1434         {0x00000082, 0x00000001},
1435         {0x00000083, 0x00000002},
1436         {0x00000084, 0xf3e4f400},
1437         {0x00000085, 0x052024e3},
1438         {0x00000087, 0x00000000},
1439         {0x00000088, 0x01000000},
1440         {0x0000008a, 0x1c0a0000},
1441         {0x0000008b, 0xff010000},
1442         {0x0000008d, 0xffffefff},
1443         {0x0000008e, 0xfff3efff},
1444         {0x0000008f, 0xfff3efbf},
1445         {0x00000092, 0xf7ffffff},
1446         {0x00000093, 0xffffff7f},
1447         {0x00000095, 0x00101101},
1448         {0x00000096, 0x00000fff},
1449         {0x00000097, 0x00116fff},
1450         {0x00000098, 0x60010000},
1451         {0x00000099, 0x10010000},
1452         {0x0000009a, 0x00006000},
1453         {0x0000009b, 0x00001000},
1454         {0x0000009f, 0x00b48000}
1455 };
1456
1457 /**
1458  * cik_srbm_select - select specific register instances
1459  *
1460  * @rdev: radeon_device pointer
1461  * @me: selected ME (micro engine)
1462  * @pipe: pipe
1463  * @queue: queue
1464  * @vmid: VMID
1465  *
1466  * Switches the currently active registers instances.  Some
1467  * registers are instanced per VMID, others are instanced per
1468  * me/pipe/queue combination.
1469  */
1470 static void cik_srbm_select(struct radeon_device *rdev,
1471                             u32 me, u32 pipe, u32 queue, u32 vmid)
1472 {
1473         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1474                              MEID(me & 0x3) |
1475                              VMID(vmid & 0xf) |
1476                              QUEUEID(queue & 0x7));
1477         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1478 }
1479
1480 /* ucode loading */
1481 /**
1482  * ci_mc_load_microcode - load MC ucode into the hw
1483  *
1484  * @rdev: radeon_device pointer
1485  *
1486  * Load the GDDR MC ucode into the hw (CIK).
1487  * Returns 0 on success, error on failure.
1488  */
1489 static int ci_mc_load_microcode(struct radeon_device *rdev)
1490 {
1491         const __be32 *fw_data;
1492         u32 running, blackout = 0;
1493         u32 *io_mc_regs;
1494         int i, ucode_size, regs_size;
1495
1496         if (!rdev->mc_fw)
1497                 return -EINVAL;
1498
1499         switch (rdev->family) {
1500         case CHIP_BONAIRE:
1501         default:
1502                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503                 ucode_size = CIK_MC_UCODE_SIZE;
1504                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1505                 break;
1506         }
1507
1508         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1509
1510         if (running == 0) {
1511                 if (running) {
1512                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1514                 }
1515
1516                 /* reset the engine and set to writable */
1517                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1519
1520                 /* load mc io regs */
1521                 for (i = 0; i < regs_size; i++) {
1522                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1524                 }
1525                 /* load the MC ucode */
1526                 fw_data = (const __be32 *)rdev->mc_fw->data;
1527                 for (i = 0; i < ucode_size; i++)
1528                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1529
1530                 /* put the engine back into the active state */
1531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1534
1535                 /* wait for training to complete */
1536                 for (i = 0; i < rdev->usec_timeout; i++) {
1537                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1538                                 break;
1539                         udelay(1);
1540                 }
1541                 for (i = 0; i < rdev->usec_timeout; i++) {
1542                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1543                                 break;
1544                         udelay(1);
1545                 }
1546
1547                 if (running)
1548                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1549         }
1550
1551         return 0;
1552 }
1553
1554 /**
1555  * cik_init_microcode - load ucode images from disk
1556  *
1557  * @rdev: radeon_device pointer
1558  *
1559  * Use the firmware interface to load the ucode images into
1560  * the driver (not loaded into hw).
1561  * Returns 0 on success, error on failure.
1562  */
1563 static int cik_init_microcode(struct radeon_device *rdev)
1564 {
1565         const char *chip_name;
1566         size_t pfp_req_size, me_req_size, ce_req_size,
1567                 mec_req_size, rlc_req_size, mc_req_size,
1568                 sdma_req_size, smc_req_size;
1569         char fw_name[30];
1570         int err;
1571
1572         DRM_DEBUG("\n");
1573
1574         switch (rdev->family) {
1575         case CHIP_BONAIRE:
1576                 chip_name = "BONAIRE";
1577                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1585                 break;
1586         case CHIP_KAVERI:
1587                 chip_name = "KAVERI";
1588                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1590                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1594                 break;
1595         case CHIP_KABINI:
1596                 chip_name = "KABINI";
1597                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1599                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1603                 break;
1604         default: BUG();
1605         }
1606
1607         DRM_INFO("Loading %s Microcode\n", chip_name);
1608
1609         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1611         if (err)
1612                 goto out;
1613         if (rdev->pfp_fw->size != pfp_req_size) {
1614                 printk(KERN_ERR
1615                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616                        rdev->pfp_fw->size, fw_name);
1617                 err = -EINVAL;
1618                 goto out;
1619         }
1620
1621         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1623         if (err)
1624                 goto out;
1625         if (rdev->me_fw->size != me_req_size) {
1626                 printk(KERN_ERR
1627                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628                        rdev->me_fw->size, fw_name);
1629                 err = -EINVAL;
1630         }
1631
1632         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1634         if (err)
1635                 goto out;
1636         if (rdev->ce_fw->size != ce_req_size) {
1637                 printk(KERN_ERR
1638                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639                        rdev->ce_fw->size, fw_name);
1640                 err = -EINVAL;
1641         }
1642
1643         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1645         if (err)
1646                 goto out;
1647         if (rdev->mec_fw->size != mec_req_size) {
1648                 printk(KERN_ERR
1649                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650                        rdev->mec_fw->size, fw_name);
1651                 err = -EINVAL;
1652         }
1653
1654         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656         if (err)
1657                 goto out;
1658         if (rdev->rlc_fw->size != rlc_req_size) {
1659                 printk(KERN_ERR
1660                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661                        rdev->rlc_fw->size, fw_name);
1662                 err = -EINVAL;
1663         }
1664
1665         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1667         if (err)
1668                 goto out;
1669         if (rdev->sdma_fw->size != sdma_req_size) {
1670                 printk(KERN_ERR
1671                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672                        rdev->sdma_fw->size, fw_name);
1673                 err = -EINVAL;
1674         }
1675
1676         /* No SMC, MC ucode on APUs */
1677         if (!(rdev->flags & RADEON_IS_IGP)) {
1678                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1680                 if (err)
1681                         goto out;
1682                 if (rdev->mc_fw->size != mc_req_size) {
1683                         printk(KERN_ERR
1684                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685                                rdev->mc_fw->size, fw_name);
1686                         err = -EINVAL;
1687                 }
1688
1689                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1691                 if (err) {
1692                         printk(KERN_ERR
1693                                "smc: error loading firmware \"%s\"\n",
1694                                fw_name);
1695                         release_firmware(rdev->smc_fw);
1696                         rdev->smc_fw = NULL;
1697                         err = 0;
1698                 } else if (rdev->smc_fw->size != smc_req_size) {
1699                         printk(KERN_ERR
1700                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1701                                rdev->smc_fw->size, fw_name);
1702                         err = -EINVAL;
1703                 }
1704         }
1705
1706 out:
1707         if (err) {
1708                 if (err != -EINVAL)
1709                         printk(KERN_ERR
1710                                "cik_cp: Failed to load firmware \"%s\"\n",
1711                                fw_name);
1712                 release_firmware(rdev->pfp_fw);
1713                 rdev->pfp_fw = NULL;
1714                 release_firmware(rdev->me_fw);
1715                 rdev->me_fw = NULL;
1716                 release_firmware(rdev->ce_fw);
1717                 rdev->ce_fw = NULL;
1718                 release_firmware(rdev->rlc_fw);
1719                 rdev->rlc_fw = NULL;
1720                 release_firmware(rdev->mc_fw);
1721                 rdev->mc_fw = NULL;
1722                 release_firmware(rdev->smc_fw);
1723                 rdev->smc_fw = NULL;
1724         }
1725         return err;
1726 }
1727
1728 /*
1729  * Core functions
1730  */
1731 /**
1732  * cik_tiling_mode_table_init - init the hw tiling table
1733  *
1734  * @rdev: radeon_device pointer
1735  *
1736  * Starting with SI, the tiling setup is done globally in a
1737  * set of 32 tiling modes.  Rather than selecting each set of
1738  * parameters per surface as on older asics, we just select
1739  * which index in the tiling table we want to use, and the
1740  * surface uses those parameters (CIK).
1741  */
1742 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1743 {
1744         const u32 num_tile_mode_states = 32;
1745         const u32 num_secondary_tile_mode_states = 16;
1746         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1747         u32 num_pipe_configs;
1748         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1749                 rdev->config.cik.max_shader_engines;
1750
1751         switch (rdev->config.cik.mem_row_size_in_kb) {
1752         case 1:
1753                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754                 break;
1755         case 2:
1756         default:
1757                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758                 break;
1759         case 4:
1760                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761                 break;
1762         }
1763
1764         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1765         if (num_pipe_configs > 8)
1766                 num_pipe_configs = 8; /* ??? */
1767
1768         if (num_pipe_configs == 8) {
1769                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1770                         switch (reg_offset) {
1771                         case 0:
1772                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1776                                 break;
1777                         case 1:
1778                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1782                                 break;
1783                         case 2:
1784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1788                                 break;
1789                         case 3:
1790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1792                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1793                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1794                                 break;
1795                         case 4:
1796                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1797                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1798                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1799                                                  TILE_SPLIT(split_equal_to_row_size));
1800                                 break;
1801                         case 5:
1802                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1803                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1804                                 break;
1805                         case 6:
1806                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1807                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1808                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1810                                 break;
1811                         case 7:
1812                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1813                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1814                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1815                                                  TILE_SPLIT(split_equal_to_row_size));
1816                                 break;
1817                         case 8:
1818                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1819                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1820                                 break;
1821                         case 9:
1822                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1823                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1824                                 break;
1825                         case 10:
1826                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1827                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830                                 break;
1831                         case 11:
1832                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1833                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1834                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1835                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1836                                 break;
1837                         case 12:
1838                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1839                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1840                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1841                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1842                                 break;
1843                         case 13:
1844                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1845                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1846                                 break;
1847                         case 14:
1848                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1849                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852                                 break;
1853                         case 16:
1854                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1855                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1856                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1857                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1858                                 break;
1859                         case 17:
1860                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1861                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1862                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1863                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1864                                 break;
1865                         case 27:
1866                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1868                                 break;
1869                         case 28:
1870                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1871                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874                                 break;
1875                         case 29:
1876                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1877                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1878                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1879                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1880                                 break;
1881                         case 30:
1882                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1883                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1884                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1885                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1886                                 break;
1887                         default:
1888                                 gb_tile_moden = 0;
1889                                 break;
1890                         }
1891                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1892                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1893                 }
1894                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1895                         switch (reg_offset) {
1896                         case 0:
1897                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1899                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1900                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1901                                 break;
1902                         case 1:
1903                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1905                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1907                                 break;
1908                         case 2:
1909                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1912                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1913                                 break;
1914                         case 3:
1915                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1918                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1919                                 break;
1920                         case 4:
1921                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1925                                 break;
1926                         case 5:
1927                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1929                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1930                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1931                                 break;
1932                         case 6:
1933                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1935                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1936                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1937                                 break;
1938                         case 8:
1939                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1941                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1942                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1943                                 break;
1944                         case 9:
1945                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1947                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1948                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1949                                 break;
1950                         case 10:
1951                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1953                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1954                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1955                                 break;
1956                         case 11:
1957                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1960                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1961                                 break;
1962                         case 12:
1963                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1967                                 break;
1968                         case 13:
1969                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1971                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1972                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1973                                 break;
1974                         case 14:
1975                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1976                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1977                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1978                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1979                                 break;
1980                         default:
1981                                 gb_tile_moden = 0;
1982                                 break;
1983                         }
1984                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1985                 }
1986         } else if (num_pipe_configs == 4) {
1987                 if (num_rbs == 4) {
1988                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1989                                 switch (reg_offset) {
1990                                 case 0:
1991                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1995                                         break;
1996                                 case 1:
1997                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2001                                         break;
2002                                 case 2:
2003                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2007                                         break;
2008                                 case 3:
2009                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2011                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2012                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2013                                         break;
2014                                 case 4:
2015                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2016                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2017                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2018                                                          TILE_SPLIT(split_equal_to_row_size));
2019                                         break;
2020                                 case 5:
2021                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2023                                         break;
2024                                 case 6:
2025                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2026                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2029                                         break;
2030                                 case 7:
2031                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2032                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2033                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2034                                                          TILE_SPLIT(split_equal_to_row_size));
2035                                         break;
2036                                 case 8:
2037                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2038                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2039                                         break;
2040                                 case 9:
2041                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2042                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2043                                         break;
2044                                 case 10:
2045                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049                                         break;
2050                                 case 11:
2051                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2052                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2053                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2054                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055                                         break;
2056                                 case 12:
2057                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2058                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2060                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061                                         break;
2062                                 case 13:
2063                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2065                                         break;
2066                                 case 14:
2067                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                                         break;
2072                                 case 16:
2073                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2074                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2076                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077                                         break;
2078                                 case 17:
2079                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2080                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2081                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083                                         break;
2084                                 case 27:
2085                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2087                                         break;
2088                                 case 28:
2089                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                                         break;
2094                                 case 29:
2095                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2096                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2097                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2098                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099                                         break;
2100                                 case 30:
2101                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2102                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2103                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2104                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2105                                         break;
2106                                 default:
2107                                         gb_tile_moden = 0;
2108                                         break;
2109                                 }
2110                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2111                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2112                         }
2113                 } else if (num_rbs < 4) {
2114                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2115                                 switch (reg_offset) {
2116                                 case 0:
2117                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2121                                         break;
2122                                 case 1:
2123                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2127                                         break;
2128                                 case 2:
2129                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2133                                         break;
2134                                 case 3:
2135                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2137                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2138                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2139                                         break;
2140                                 case 4:
2141                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2143                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2144                                                          TILE_SPLIT(split_equal_to_row_size));
2145                                         break;
2146                                 case 5:
2147                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2149                                         break;
2150                                 case 6:
2151                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2152                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2153                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2155                                         break;
2156                                 case 7:
2157                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2158                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2159                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2160                                                          TILE_SPLIT(split_equal_to_row_size));
2161                                         break;
2162                                 case 8:
2163                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2164                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2165                                         break;
2166                                 case 9:
2167                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2169                                         break;
2170                                 case 10:
2171                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175                                         break;
2176                                 case 11:
2177                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2179                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2180                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181                                         break;
2182                                 case 12:
2183                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2184                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2186                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187                                         break;
2188                                 case 13:
2189                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2191                                         break;
2192                                 case 14:
2193                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197                                         break;
2198                                 case 16:
2199                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2200                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2202                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203                                         break;
2204                                 case 17:
2205                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2206                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2208                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209                                         break;
2210                                 case 27:
2211                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2212                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2213                                         break;
2214                                 case 28:
2215                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219                                         break;
2220                                 case 29:
2221                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2222                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2223                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2224                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2225                                         break;
2226                                 case 30:
2227                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2228                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2230                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2231                                         break;
2232                                 default:
2233                                         gb_tile_moden = 0;
2234                                         break;
2235                                 }
2236                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2237                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2238                         }
2239                 }
2240                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2241                         switch (reg_offset) {
2242                         case 0:
2243                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2247                                 break;
2248                         case 1:
2249                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2251                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2253                                 break;
2254                         case 2:
2255                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2259                                 break;
2260                         case 3:
2261                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2265                                 break;
2266                         case 4:
2267                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2271                                 break;
2272                         case 5:
2273                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2277                                 break;
2278                         case 6:
2279                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2283                                 break;
2284                         case 8:
2285                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2286                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2287                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2289                                 break;
2290                         case 9:
2291                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                                 break;
2296                         case 10:
2297                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2301                                 break;
2302                         case 11:
2303                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2307                                 break;
2308                         case 12:
2309                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2313                                 break;
2314                         case 13:
2315                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2317                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2318                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2319                                 break;
2320                         case 14:
2321                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2323                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2324                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2325                                 break;
2326                         default:
2327                                 gb_tile_moden = 0;
2328                                 break;
2329                         }
2330                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2331                 }
2332         } else if (num_pipe_configs == 2) {
2333                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2334                         switch (reg_offset) {
2335                         case 0:
2336                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2339                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2340                                 break;
2341                         case 1:
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2345                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2346                                 break;
2347                         case 2:
2348                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2351                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2352                                 break;
2353                         case 3:
2354                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2356                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2357                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2358                                 break;
2359                         case 4:
2360                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2362                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2363                                                  TILE_SPLIT(split_equal_to_row_size));
2364                                 break;
2365                         case 5:
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368                                 break;
2369                         case 6:
2370                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2371                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2373                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2374                                 break;
2375                         case 7:
2376                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2377                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2378                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2379                                                  TILE_SPLIT(split_equal_to_row_size));
2380                                 break;
2381                         case 8:
2382                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2383                                 break;
2384                         case 9:
2385                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2386                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2387                                 break;
2388                         case 10:
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2392                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393                                 break;
2394                         case 11:
2395                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2397                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2398                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399                                 break;
2400                         case 12:
2401                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2404                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405                                 break;
2406                         case 13:
2407                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2408                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2409                                 break;
2410                         case 14:
2411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2414                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                                 break;
2416                         case 16:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2420                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                                 break;
2422                         case 17:
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2426                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                                 break;
2428                         case 27:
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2431                                 break;
2432                         case 28:
2433                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2436                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                                 break;
2438                         case 29:
2439                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2442                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                                 break;
2444                         case 30:
2445                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2446                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2448                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449                                 break;
2450                         default:
2451                                 gb_tile_moden = 0;
2452                                 break;
2453                         }
2454                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2455                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2456                 }
2457                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2458                         switch (reg_offset) {
2459                         case 0:
2460                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2461                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2464                                 break;
2465                         case 1:
2466                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2467                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2470                                 break;
2471                         case 2:
2472                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2476                                 break;
2477                         case 3:
2478                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2482                                 break;
2483                         case 4:
2484                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2487                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2488                                 break;
2489                         case 5:
2490                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2494                                 break;
2495                         case 6:
2496                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2500                                 break;
2501                         case 8:
2502                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2503                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2504                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2506                                 break;
2507                         case 9:
2508                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2512                                 break;
2513                         case 10:
2514                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2515                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2516                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2518                                 break;
2519                         case 11:
2520                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2524                                 break;
2525                         case 12:
2526                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2530                                 break;
2531                         case 13:
2532                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2535                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2536                                 break;
2537                         case 14:
2538                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2542                                 break;
2543                         default:
2544                                 gb_tile_moden = 0;
2545                                 break;
2546                         }
2547                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2548                 }
2549         } else
2550                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2551 }
2552
2553 /**
2554  * cik_select_se_sh - select which SE, SH to address
2555  *
2556  * @rdev: radeon_device pointer
2557  * @se_num: shader engine to address
2558  * @sh_num: sh block to address
2559  *
2560  * Select which SE, SH combinations to address. Certain
2561  * registers are instanced per SE or SH.  0xffffffff means
2562  * broadcast to all SEs or SHs (CIK).
2563  */
2564 static void cik_select_se_sh(struct radeon_device *rdev,
2565                              u32 se_num, u32 sh_num)
2566 {
2567         u32 data = INSTANCE_BROADCAST_WRITES;
2568
2569         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2570                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2571         else if (se_num == 0xffffffff)
2572                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2573         else if (sh_num == 0xffffffff)
2574                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2575         else
2576                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2577         WREG32(GRBM_GFX_INDEX, data);
2578 }
2579
2580 /**
2581  * cik_create_bitmask - create a bitmask
2582  *
2583  * @bit_width: length of the mask
2584  *
2585  * create a variable length bit mask (CIK).
2586  * Returns the bitmask.
2587  */
2588 static u32 cik_create_bitmask(u32 bit_width)
2589 {
2590         u32 i, mask = 0;
2591
2592         for (i = 0; i < bit_width; i++) {
2593                 mask <<= 1;
2594                 mask |= 1;
2595         }
2596         return mask;
2597 }
2598
2599 /**
2600  * cik_select_se_sh - select which SE, SH to address
2601  *
2602  * @rdev: radeon_device pointer
2603  * @max_rb_num: max RBs (render backends) for the asic
2604  * @se_num: number of SEs (shader engines) for the asic
2605  * @sh_per_se: number of SH blocks per SE for the asic
2606  *
2607  * Calculates the bitmask of disabled RBs (CIK).
2608  * Returns the disabled RB bitmask.
2609  */
2610 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2611                               u32 max_rb_num, u32 se_num,
2612                               u32 sh_per_se)
2613 {
2614         u32 data, mask;
2615
2616         data = RREG32(CC_RB_BACKEND_DISABLE);
2617         if (data & 1)
2618                 data &= BACKEND_DISABLE_MASK;
2619         else
2620                 data = 0;
2621         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2622
2623         data >>= BACKEND_DISABLE_SHIFT;
2624
2625         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2626
2627         return data & mask;
2628 }
2629
2630 /**
2631  * cik_setup_rb - setup the RBs on the asic
2632  *
2633  * @rdev: radeon_device pointer
2634  * @se_num: number of SEs (shader engines) for the asic
2635  * @sh_per_se: number of SH blocks per SE for the asic
2636  * @max_rb_num: max RBs (render backends) for the asic
2637  *
2638  * Configures per-SE/SH RB registers (CIK).
2639  */
2640 static void cik_setup_rb(struct radeon_device *rdev,
2641                          u32 se_num, u32 sh_per_se,
2642                          u32 max_rb_num)
2643 {
2644         int i, j;
2645         u32 data, mask;
2646         u32 disabled_rbs = 0;
2647         u32 enabled_rbs = 0;
2648
2649         for (i = 0; i < se_num; i++) {
2650                 for (j = 0; j < sh_per_se; j++) {
2651                         cik_select_se_sh(rdev, i, j);
2652                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2653                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2654                 }
2655         }
2656         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2657
2658         mask = 1;
2659         for (i = 0; i < max_rb_num; i++) {
2660                 if (!(disabled_rbs & mask))
2661                         enabled_rbs |= mask;
2662                 mask <<= 1;
2663         }
2664
2665         for (i = 0; i < se_num; i++) {
2666                 cik_select_se_sh(rdev, i, 0xffffffff);
2667                 data = 0;
2668                 for (j = 0; j < sh_per_se; j++) {
2669                         switch (enabled_rbs & 3) {
2670                         case 1:
2671                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2672                                 break;
2673                         case 2:
2674                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2675                                 break;
2676                         case 3:
2677                         default:
2678                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2679                                 break;
2680                         }
2681                         enabled_rbs >>= 2;
2682                 }
2683                 WREG32(PA_SC_RASTER_CONFIG, data);
2684         }
2685         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2686 }
2687
2688 /**
2689  * cik_gpu_init - setup the 3D engine
2690  *
2691  * @rdev: radeon_device pointer
2692  *
2693  * Configures the 3D engine and tiling configuration
2694  * registers so that the 3D engine is usable.
2695  */
2696 static void cik_gpu_init(struct radeon_device *rdev)
2697 {
2698         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2699         u32 mc_shared_chmap, mc_arb_ramcfg;
2700         u32 hdp_host_path_cntl;
2701         u32 tmp;
2702         int i, j;
2703
2704         switch (rdev->family) {
2705         case CHIP_BONAIRE:
2706                 rdev->config.cik.max_shader_engines = 2;
2707                 rdev->config.cik.max_tile_pipes = 4;
2708                 rdev->config.cik.max_cu_per_sh = 7;
2709                 rdev->config.cik.max_sh_per_se = 1;
2710                 rdev->config.cik.max_backends_per_se = 2;
2711                 rdev->config.cik.max_texture_channel_caches = 4;
2712                 rdev->config.cik.max_gprs = 256;
2713                 rdev->config.cik.max_gs_threads = 32;
2714                 rdev->config.cik.max_hw_contexts = 8;
2715
2716                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2717                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2718                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2719                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2720                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2721                 break;
2722         case CHIP_KAVERI:
2723                 rdev->config.cik.max_shader_engines = 1;
2724                 rdev->config.cik.max_tile_pipes = 4;
2725                 if ((rdev->pdev->device == 0x1304) ||
2726                     (rdev->pdev->device == 0x1305) ||
2727                     (rdev->pdev->device == 0x130C) ||
2728                     (rdev->pdev->device == 0x130F) ||
2729                     (rdev->pdev->device == 0x1310) ||
2730                     (rdev->pdev->device == 0x1311) ||
2731                     (rdev->pdev->device == 0x131C)) {
2732                         rdev->config.cik.max_cu_per_sh = 8;
2733                         rdev->config.cik.max_backends_per_se = 2;
2734                 } else if ((rdev->pdev->device == 0x1309) ||
2735                            (rdev->pdev->device == 0x130A) ||
2736                            (rdev->pdev->device == 0x130D) ||
2737                            (rdev->pdev->device == 0x1313) ||
2738                            (rdev->pdev->device == 0x131D)) {
2739                         rdev->config.cik.max_cu_per_sh = 6;
2740                         rdev->config.cik.max_backends_per_se = 2;
2741                 } else if ((rdev->pdev->device == 0x1306) ||
2742                            (rdev->pdev->device == 0x1307) ||
2743                            (rdev->pdev->device == 0x130B) ||
2744                            (rdev->pdev->device == 0x130E) ||
2745                            (rdev->pdev->device == 0x1315) ||
2746                            (rdev->pdev->device == 0x131B)) {
2747                         rdev->config.cik.max_cu_per_sh = 4;
2748                         rdev->config.cik.max_backends_per_se = 1;
2749                 } else {
2750                         rdev->config.cik.max_cu_per_sh = 3;
2751                         rdev->config.cik.max_backends_per_se = 1;
2752                 }
2753                 rdev->config.cik.max_sh_per_se = 1;
2754                 rdev->config.cik.max_texture_channel_caches = 4;
2755                 rdev->config.cik.max_gprs = 256;
2756                 rdev->config.cik.max_gs_threads = 16;
2757                 rdev->config.cik.max_hw_contexts = 8;
2758
2759                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2760                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2761                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2762                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2763                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2764                 break;
2765         case CHIP_KABINI:
2766         default:
2767                 rdev->config.cik.max_shader_engines = 1;
2768                 rdev->config.cik.max_tile_pipes = 2;
2769                 rdev->config.cik.max_cu_per_sh = 2;
2770                 rdev->config.cik.max_sh_per_se = 1;
2771                 rdev->config.cik.max_backends_per_se = 1;
2772                 rdev->config.cik.max_texture_channel_caches = 2;
2773                 rdev->config.cik.max_gprs = 256;
2774                 rdev->config.cik.max_gs_threads = 16;
2775                 rdev->config.cik.max_hw_contexts = 8;
2776
2777                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2778                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2779                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2780                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2781                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2782                 break;
2783         }
2784
2785         /* Initialize HDP */
2786         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2787                 WREG32((0x2c14 + j), 0x00000000);
2788                 WREG32((0x2c18 + j), 0x00000000);
2789                 WREG32((0x2c1c + j), 0x00000000);
2790                 WREG32((0x2c20 + j), 0x00000000);
2791                 WREG32((0x2c24 + j), 0x00000000);
2792         }
2793
2794         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2795
2796         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2797
2798         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2799         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2800
2801         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2802         rdev->config.cik.mem_max_burst_length_bytes = 256;
2803         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2804         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2805         if (rdev->config.cik.mem_row_size_in_kb > 4)
2806                 rdev->config.cik.mem_row_size_in_kb = 4;
2807         /* XXX use MC settings? */
2808         rdev->config.cik.shader_engine_tile_size = 32;
2809         rdev->config.cik.num_gpus = 1;
2810         rdev->config.cik.multi_gpu_tile_size = 64;
2811
2812         /* fix up row size */
2813         gb_addr_config &= ~ROW_SIZE_MASK;
2814         switch (rdev->config.cik.mem_row_size_in_kb) {
2815         case 1:
2816         default:
2817                 gb_addr_config |= ROW_SIZE(0);
2818                 break;
2819         case 2:
2820                 gb_addr_config |= ROW_SIZE(1);
2821                 break;
2822         case 4:
2823                 gb_addr_config |= ROW_SIZE(2);
2824                 break;
2825         }
2826
2827         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2828          * not have bank info, so create a custom tiling dword.
2829          * bits 3:0   num_pipes
2830          * bits 7:4   num_banks
2831          * bits 11:8  group_size
2832          * bits 15:12 row_size
2833          */
2834         rdev->config.cik.tile_config = 0;
2835         switch (rdev->config.cik.num_tile_pipes) {
2836         case 1:
2837                 rdev->config.cik.tile_config |= (0 << 0);
2838                 break;
2839         case 2:
2840                 rdev->config.cik.tile_config |= (1 << 0);
2841                 break;
2842         case 4:
2843                 rdev->config.cik.tile_config |= (2 << 0);
2844                 break;
2845         case 8:
2846         default:
2847                 /* XXX what about 12? */
2848                 rdev->config.cik.tile_config |= (3 << 0);
2849                 break;
2850         }
2851         rdev->config.cik.tile_config |=
2852                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2853         rdev->config.cik.tile_config |=
2854                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2855         rdev->config.cik.tile_config |=
2856                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2857
2858         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2859         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2860         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2861         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2862         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2863         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2864         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2865         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2866
2867         cik_tiling_mode_table_init(rdev);
2868
2869         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2870                      rdev->config.cik.max_sh_per_se,
2871                      rdev->config.cik.max_backends_per_se);
2872
2873         /* set HW defaults for 3D engine */
2874         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2875
2876         WREG32(SX_DEBUG_1, 0x20);
2877
2878         WREG32(TA_CNTL_AUX, 0x00010000);
2879
2880         tmp = RREG32(SPI_CONFIG_CNTL);
2881         tmp |= 0x03000000;
2882         WREG32(SPI_CONFIG_CNTL, tmp);
2883
2884         WREG32(SQ_CONFIG, 1);
2885
2886         WREG32(DB_DEBUG, 0);
2887
2888         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2889         tmp |= 0x00000400;
2890         WREG32(DB_DEBUG2, tmp);
2891
2892         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2893         tmp |= 0x00020200;
2894         WREG32(DB_DEBUG3, tmp);
2895
2896         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2897         tmp |= 0x00018208;
2898         WREG32(CB_HW_CONTROL, tmp);
2899
2900         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2901
2902         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2903                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2904                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2905                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2906
2907         WREG32(VGT_NUM_INSTANCES, 1);
2908
2909         WREG32(CP_PERFMON_CNTL, 0);
2910
2911         WREG32(SQ_CONFIG, 0);
2912
2913         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2914                                           FORCE_EOV_MAX_REZ_CNT(255)));
2915
2916         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2917                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2918
2919         WREG32(VGT_GS_VERTEX_REUSE, 16);
2920         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2921
2922         tmp = RREG32(HDP_MISC_CNTL);
2923         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2924         WREG32(HDP_MISC_CNTL, tmp);
2925
2926         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2927         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2928
2929         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2930         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2931
2932         udelay(50);
2933 }
2934
2935 /*
2936  * GPU scratch registers helpers function.
2937  */
2938 /**
2939  * cik_scratch_init - setup driver info for CP scratch regs
2940  *
2941  * @rdev: radeon_device pointer
2942  *
2943  * Set up the number and offset of the CP scratch registers.
2944  * NOTE: use of CP scratch registers is a legacy inferface and
2945  * is not used by default on newer asics (r6xx+).  On newer asics,
2946  * memory buffers are used for fences rather than scratch regs.
2947  */
2948 static void cik_scratch_init(struct radeon_device *rdev)
2949 {
2950         int i;
2951
2952         rdev->scratch.num_reg = 7;
2953         rdev->scratch.reg_base = SCRATCH_REG0;
2954         for (i = 0; i < rdev->scratch.num_reg; i++) {
2955                 rdev->scratch.free[i] = true;
2956                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2957         }
2958 }
2959
2960 /**
2961  * cik_ring_test - basic gfx ring test
2962  *
2963  * @rdev: radeon_device pointer
2964  * @ring: radeon_ring structure holding ring information
2965  *
2966  * Allocate a scratch register and write to it using the gfx ring (CIK).
2967  * Provides a basic gfx ring test to verify that the ring is working.
2968  * Used by cik_cp_gfx_resume();
2969  * Returns 0 on success, error on failure.
2970  */
2971 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2972 {
2973         uint32_t scratch;
2974         uint32_t tmp = 0;
2975         unsigned i;
2976         int r;
2977
2978         r = radeon_scratch_get(rdev, &scratch);
2979         if (r) {
2980                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2981                 return r;
2982         }
2983         WREG32(scratch, 0xCAFEDEAD);
2984         r = radeon_ring_lock(rdev, ring, 3);
2985         if (r) {
2986                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2987                 radeon_scratch_free(rdev, scratch);
2988                 return r;
2989         }
2990         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2991         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2992         radeon_ring_write(ring, 0xDEADBEEF);
2993         radeon_ring_unlock_commit(rdev, ring);
2994
2995         for (i = 0; i < rdev->usec_timeout; i++) {
2996                 tmp = RREG32(scratch);
2997                 if (tmp == 0xDEADBEEF)
2998                         break;
2999                 DRM_UDELAY(1);
3000         }
3001         if (i < rdev->usec_timeout) {
3002                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3003         } else {
3004                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3005                           ring->idx, scratch, tmp);
3006                 r = -EINVAL;
3007         }
3008         radeon_scratch_free(rdev, scratch);
3009         return r;
3010 }
3011
3012 /**
3013  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3014  *
3015  * @rdev: radeon_device pointer
3016  * @fence: radeon fence object
3017  *
3018  * Emits a fence sequnce number on the gfx ring and flushes
3019  * GPU caches.
3020  */
3021 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3022                              struct radeon_fence *fence)
3023 {
3024         struct radeon_ring *ring = &rdev->ring[fence->ring];
3025         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3026
3027         /* EVENT_WRITE_EOP - flush caches, send int */
3028         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3029         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3030                                  EOP_TC_ACTION_EN |
3031                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3032                                  EVENT_INDEX(5)));
3033         radeon_ring_write(ring, addr & 0xfffffffc);
3034         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3035         radeon_ring_write(ring, fence->seq);
3036         radeon_ring_write(ring, 0);
3037         /* HDP flush */
3038         /* We should be using the new WAIT_REG_MEM special op packet here
3039          * but it causes the CP to hang
3040          */
3041         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3042         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3043                                  WRITE_DATA_DST_SEL(0)));
3044         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3045         radeon_ring_write(ring, 0);
3046         radeon_ring_write(ring, 0);
3047 }
3048
3049 /**
3050  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3051  *
3052  * @rdev: radeon_device pointer
3053  * @fence: radeon fence object
3054  *
3055  * Emits a fence sequnce number on the compute ring and flushes
3056  * GPU caches.
3057  */
3058 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3059                                  struct radeon_fence *fence)
3060 {
3061         struct radeon_ring *ring = &rdev->ring[fence->ring];
3062         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3063
3064         /* RELEASE_MEM - flush caches, send int */
3065         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3066         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3067                                  EOP_TC_ACTION_EN |
3068                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3069                                  EVENT_INDEX(5)));
3070         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3071         radeon_ring_write(ring, addr & 0xfffffffc);
3072         radeon_ring_write(ring, upper_32_bits(addr));
3073         radeon_ring_write(ring, fence->seq);
3074         radeon_ring_write(ring, 0);
3075         /* HDP flush */
3076         /* We should be using the new WAIT_REG_MEM special op packet here
3077          * but it causes the CP to hang
3078          */
3079         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3080         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3081                                  WRITE_DATA_DST_SEL(0)));
3082         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3083         radeon_ring_write(ring, 0);
3084         radeon_ring_write(ring, 0);
3085 }
3086
3087 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3088                              struct radeon_ring *ring,
3089                              struct radeon_semaphore *semaphore,
3090                              bool emit_wait)
3091 {
3092         uint64_t addr = semaphore->gpu_addr;
3093         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3094
3095         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3096         radeon_ring_write(ring, addr & 0xffffffff);
3097         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3098 }
3099
3100 /*
3101  * IB stuff
3102  */
3103 /**
3104  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3105  *
3106  * @rdev: radeon_device pointer
3107  * @ib: radeon indirect buffer object
3108  *
3109  * Emits an DE (drawing engine) or CE (constant engine) IB
3110  * on the gfx ring.  IBs are usually generated by userspace
3111  * acceleration drivers and submitted to the kernel for
3112  * sheduling on the ring.  This function schedules the IB
3113  * on the gfx ring for execution by the GPU.
3114  */
3115 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3116 {
3117         struct radeon_ring *ring = &rdev->ring[ib->ring];
3118         u32 header, control = INDIRECT_BUFFER_VALID;
3119
3120         if (ib->is_const_ib) {
3121                 /* set switch buffer packet before const IB */
3122                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3123                 radeon_ring_write(ring, 0);
3124
3125                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3126         } else {
3127                 u32 next_rptr;
3128                 if (ring->rptr_save_reg) {
3129                         next_rptr = ring->wptr + 3 + 4;
3130                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3131                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3132                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3133                         radeon_ring_write(ring, next_rptr);
3134                 } else if (rdev->wb.enabled) {
3135                         next_rptr = ring->wptr + 5 + 4;
3136                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3137                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3138                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3139                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3140                         radeon_ring_write(ring, next_rptr);
3141                 }
3142
3143                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3144         }
3145
3146         control |= ib->length_dw |
3147                 (ib->vm ? (ib->vm->id << 24) : 0);
3148
3149         radeon_ring_write(ring, header);
3150         radeon_ring_write(ring,
3151 #ifdef __BIG_ENDIAN
3152                           (2 << 0) |
3153 #endif
3154                           (ib->gpu_addr & 0xFFFFFFFC));
3155         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3156         radeon_ring_write(ring, control);
3157 }
3158
3159 /**
3160  * cik_ib_test - basic gfx ring IB test
3161  *
3162  * @rdev: radeon_device pointer
3163  * @ring: radeon_ring structure holding ring information
3164  *
3165  * Allocate an IB and execute it on the gfx ring (CIK).
3166  * Provides a basic gfx ring test to verify that IBs are working.
3167  * Returns 0 on success, error on failure.
3168  */
3169 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3170 {
3171         struct radeon_ib ib;
3172         uint32_t scratch;
3173         uint32_t tmp = 0;
3174         unsigned i;
3175         int r;
3176
3177         r = radeon_scratch_get(rdev, &scratch);
3178         if (r) {
3179                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3180                 return r;
3181         }
3182         WREG32(scratch, 0xCAFEDEAD);
3183         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3184         if (r) {
3185                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3186                 radeon_scratch_free(rdev, scratch);
3187                 return r;
3188         }
3189         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3190         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3191         ib.ptr[2] = 0xDEADBEEF;
3192         ib.length_dw = 3;
3193         r = radeon_ib_schedule(rdev, &ib, NULL);
3194         if (r) {
3195                 radeon_scratch_free(rdev, scratch);
3196                 radeon_ib_free(rdev, &ib);
3197                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3198                 return r;
3199         }
3200         r = radeon_fence_wait(ib.fence, false);
3201         if (r) {
3202                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3203                 radeon_scratch_free(rdev, scratch);
3204                 radeon_ib_free(rdev, &ib);
3205                 return r;
3206         }
3207         for (i = 0; i < rdev->usec_timeout; i++) {
3208                 tmp = RREG32(scratch);
3209                 if (tmp == 0xDEADBEEF)
3210                         break;
3211                 DRM_UDELAY(1);
3212         }
3213         if (i < rdev->usec_timeout) {
3214                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3215         } else {
3216                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3217                           scratch, tmp);
3218                 r = -EINVAL;
3219         }
3220         radeon_scratch_free(rdev, scratch);
3221         radeon_ib_free(rdev, &ib);
3222         return r;
3223 }
3224
3225 /*
3226  * CP.
3227  * On CIK, gfx and compute now have independant command processors.
3228  *
3229  * GFX
3230  * Gfx consists of a single ring and can process both gfx jobs and
3231  * compute jobs.  The gfx CP consists of three microengines (ME):
3232  * PFP - Pre-Fetch Parser
3233  * ME - Micro Engine
3234  * CE - Constant Engine
3235  * The PFP and ME make up what is considered the Drawing Engine (DE).
3236  * The CE is an asynchronous engine used for updating buffer desciptors
3237  * used by the DE so that they can be loaded into cache in parallel
3238  * while the DE is processing state update packets.
3239  *
3240  * Compute
3241  * The compute CP consists of two microengines (ME):
3242  * MEC1 - Compute MicroEngine 1
3243  * MEC2 - Compute MicroEngine 2
3244  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3245  * The queues are exposed to userspace and are programmed directly
3246  * by the compute runtime.
3247  */
3248 /**
3249  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3250  *
3251  * @rdev: radeon_device pointer
3252  * @enable: enable or disable the MEs
3253  *
3254  * Halts or unhalts the gfx MEs.
3255  */
3256 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3257 {
3258         if (enable)
3259                 WREG32(CP_ME_CNTL, 0);
3260         else {
3261                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3262                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3263         }
3264         udelay(50);
3265 }
3266
3267 /**
3268  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3269  *
3270  * @rdev: radeon_device pointer
3271  *
3272  * Loads the gfx PFP, ME, and CE ucode.
3273  * Returns 0 for success, -EINVAL if the ucode is not available.
3274  */
3275 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3276 {
3277         const __be32 *fw_data;
3278         int i;
3279
3280         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3281                 return -EINVAL;
3282
3283         cik_cp_gfx_enable(rdev, false);
3284
3285         /* PFP */
3286         fw_data = (const __be32 *)rdev->pfp_fw->data;
3287         WREG32(CP_PFP_UCODE_ADDR, 0);
3288         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3289                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3290         WREG32(CP_PFP_UCODE_ADDR, 0);
3291
3292         /* CE */
3293         fw_data = (const __be32 *)rdev->ce_fw->data;
3294         WREG32(CP_CE_UCODE_ADDR, 0);
3295         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3296                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3297         WREG32(CP_CE_UCODE_ADDR, 0);
3298
3299         /* ME */
3300         fw_data = (const __be32 *)rdev->me_fw->data;
3301         WREG32(CP_ME_RAM_WADDR, 0);
3302         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3303                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3304         WREG32(CP_ME_RAM_WADDR, 0);
3305
3306         WREG32(CP_PFP_UCODE_ADDR, 0);
3307         WREG32(CP_CE_UCODE_ADDR, 0);
3308         WREG32(CP_ME_RAM_WADDR, 0);
3309         WREG32(CP_ME_RAM_RADDR, 0);
3310         return 0;
3311 }
3312
3313 /**
3314  * cik_cp_gfx_start - start the gfx ring
3315  *
3316  * @rdev: radeon_device pointer
3317  *
3318  * Enables the ring and loads the clear state context and other
3319  * packets required to init the ring.
3320  * Returns 0 for success, error for failure.
3321  */
3322 static int cik_cp_gfx_start(struct radeon_device *rdev)
3323 {
3324         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3325         int r, i;
3326
3327         /* init the CP */
3328         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3329         WREG32(CP_ENDIAN_SWAP, 0);
3330         WREG32(CP_DEVICE_ID, 1);
3331
3332         cik_cp_gfx_enable(rdev, true);
3333
3334         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3335         if (r) {
3336                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3337                 return r;
3338         }
3339
3340         /* init the CE partitions.  CE only used for gfx on CIK */
3341         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3342         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3343         radeon_ring_write(ring, 0xc000);
3344         radeon_ring_write(ring, 0xc000);
3345
3346         /* setup clear context state */
3347         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3348         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3349
3350         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3351         radeon_ring_write(ring, 0x80000000);
3352         radeon_ring_write(ring, 0x80000000);
3353
3354         for (i = 0; i < cik_default_size; i++)
3355                 radeon_ring_write(ring, cik_default_state[i]);
3356
3357         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3358         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3359
3360         /* set clear context state */
3361         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3362         radeon_ring_write(ring, 0);
3363
3364         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3365         radeon_ring_write(ring, 0x00000316);
3366         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3367         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3368
3369         radeon_ring_unlock_commit(rdev, ring);
3370
3371         return 0;
3372 }
3373
3374 /**
3375  * cik_cp_gfx_fini - stop the gfx ring
3376  *
3377  * @rdev: radeon_device pointer
3378  *
3379  * Stop the gfx ring and tear down the driver ring
3380  * info.
3381  */
3382 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3383 {
3384         cik_cp_gfx_enable(rdev, false);
3385         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3386 }
3387
3388 /**
3389  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3390  *
3391  * @rdev: radeon_device pointer
3392  *
3393  * Program the location and size of the gfx ring buffer
3394  * and test it to make sure it's working.
3395  * Returns 0 for success, error for failure.
3396  */
3397 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3398 {
3399         struct radeon_ring *ring;
3400         u32 tmp;
3401         u32 rb_bufsz;
3402         u64 rb_addr;
3403         int r;
3404
3405         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3406         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3407
3408         /* Set the write pointer delay */
3409         WREG32(CP_RB_WPTR_DELAY, 0);
3410
3411         /* set the RB to use vmid 0 */
3412         WREG32(CP_RB_VMID, 0);
3413
3414         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3415
3416         /* ring 0 - compute and gfx */
3417         /* Set ring buffer size */
3418         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3419         rb_bufsz = order_base_2(ring->ring_size / 8);
3420         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3421 #ifdef __BIG_ENDIAN
3422         tmp |= BUF_SWAP_32BIT;
3423 #endif
3424         WREG32(CP_RB0_CNTL, tmp);
3425
3426         /* Initialize the ring buffer's read and write pointers */
3427         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3428         ring->wptr = 0;
3429         WREG32(CP_RB0_WPTR, ring->wptr);
3430
3431         /* set the wb address wether it's enabled or not */
3432         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3433         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3434
3435         /* scratch register shadowing is no longer supported */
3436         WREG32(SCRATCH_UMSK, 0);
3437
3438         if (!rdev->wb.enabled)
3439                 tmp |= RB_NO_UPDATE;
3440
3441         mdelay(1);
3442         WREG32(CP_RB0_CNTL, tmp);
3443
3444         rb_addr = ring->gpu_addr >> 8;
3445         WREG32(CP_RB0_BASE, rb_addr);
3446         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3447
3448         ring->rptr = RREG32(CP_RB0_RPTR);
3449
3450         /* start the ring */
3451         cik_cp_gfx_start(rdev);
3452         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3453         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3454         if (r) {
3455                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3456                 return r;
3457         }
3458         return 0;
3459 }
3460
3461 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3462                               struct radeon_ring *ring)
3463 {
3464         u32 rptr;
3465
3466
3467
3468         if (rdev->wb.enabled) {
3469                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3470         } else {
3471                 mutex_lock(&rdev->srbm_mutex);
3472                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3473                 rptr = RREG32(CP_HQD_PQ_RPTR);
3474                 cik_srbm_select(rdev, 0, 0, 0, 0);
3475                 mutex_unlock(&rdev->srbm_mutex);
3476         }
3477
3478         return rptr;
3479 }
3480
3481 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3482                               struct radeon_ring *ring)
3483 {
3484         u32 wptr;
3485
3486         if (rdev->wb.enabled) {
3487                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3488         } else {
3489                 mutex_lock(&rdev->srbm_mutex);
3490                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3491                 wptr = RREG32(CP_HQD_PQ_WPTR);
3492                 cik_srbm_select(rdev, 0, 0, 0, 0);
3493                 mutex_unlock(&rdev->srbm_mutex);
3494         }
3495
3496         return wptr;
3497 }
3498
3499 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3500                                struct radeon_ring *ring)
3501 {
3502         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3503         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3504 }
3505
3506 /**
3507  * cik_cp_compute_enable - enable/disable the compute CP MEs
3508  *
3509  * @rdev: radeon_device pointer
3510  * @enable: enable or disable the MEs
3511  *
3512  * Halts or unhalts the compute MEs.
3513  */
3514 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3515 {
3516         if (enable)
3517                 WREG32(CP_MEC_CNTL, 0);
3518         else
3519                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3520         udelay(50);
3521 }
3522
3523 /**
3524  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Loads the compute MEC1&2 ucode.
3529  * Returns 0 for success, -EINVAL if the ucode is not available.
3530  */
3531 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3532 {
3533         const __be32 *fw_data;
3534         int i;
3535
3536         if (!rdev->mec_fw)
3537                 return -EINVAL;
3538
3539         cik_cp_compute_enable(rdev, false);
3540
3541         /* MEC1 */
3542         fw_data = (const __be32 *)rdev->mec_fw->data;
3543         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3544         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3545                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3546         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3547
3548         if (rdev->family == CHIP_KAVERI) {
3549                 /* MEC2 */
3550                 fw_data = (const __be32 *)rdev->mec_fw->data;
3551                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3552                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3553                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3554                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3555         }
3556
3557         return 0;
3558 }
3559
3560 /**
3561  * cik_cp_compute_start - start the compute queues
3562  *
3563  * @rdev: radeon_device pointer
3564  *
3565  * Enable the compute queues.
3566  * Returns 0 for success, error for failure.
3567  */
3568 static int cik_cp_compute_start(struct radeon_device *rdev)
3569 {
3570         cik_cp_compute_enable(rdev, true);
3571
3572         return 0;
3573 }
3574
3575 /**
3576  * cik_cp_compute_fini - stop the compute queues
3577  *
3578  * @rdev: radeon_device pointer
3579  *
3580  * Stop the compute queues and tear down the driver queue
3581  * info.
3582  */
3583 static void cik_cp_compute_fini(struct radeon_device *rdev)
3584 {
3585         int i, idx, r;
3586
3587         cik_cp_compute_enable(rdev, false);
3588
3589         for (i = 0; i < 2; i++) {
3590                 if (i == 0)
3591                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3592                 else
3593                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3594
3595                 if (rdev->ring[idx].mqd_obj) {
3596                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3597                         if (unlikely(r != 0))
3598                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3599
3600                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3601                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3602
3603                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3604                         rdev->ring[idx].mqd_obj = NULL;
3605                 }
3606         }
3607 }
3608
3609 static void cik_mec_fini(struct radeon_device *rdev)
3610 {
3611         int r;
3612
3613         if (rdev->mec.hpd_eop_obj) {
3614                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3615                 if (unlikely(r != 0))
3616                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3617                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3618                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3619
3620                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3621                 rdev->mec.hpd_eop_obj = NULL;
3622         }
3623 }
3624
3625 #define MEC_HPD_SIZE 2048
3626
3627 static int cik_mec_init(struct radeon_device *rdev)
3628 {
3629         int r;
3630         u32 *hpd;
3631
3632         /*
3633          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3634          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3635          */
3636         if (rdev->family == CHIP_KAVERI)
3637                 rdev->mec.num_mec = 2;
3638         else
3639                 rdev->mec.num_mec = 1;
3640         rdev->mec.num_pipe = 4;
3641         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3642
3643         if (rdev->mec.hpd_eop_obj == NULL) {
3644                 r = radeon_bo_create(rdev,
3645                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3646                                      PAGE_SIZE, true,
3647                                      RADEON_GEM_DOMAIN_GTT, NULL,
3648                                      &rdev->mec.hpd_eop_obj);
3649                 if (r) {
3650                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3651                         return r;
3652                 }
3653         }
3654
3655         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3656         if (unlikely(r != 0)) {
3657                 cik_mec_fini(rdev);
3658                 return r;
3659         }
3660         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3661                           &rdev->mec.hpd_eop_gpu_addr);
3662         if (r) {
3663                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3664                 cik_mec_fini(rdev);
3665                 return r;
3666         }
3667         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3668         if (r) {
3669                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3670                 cik_mec_fini(rdev);
3671                 return r;
3672         }
3673
3674         /* clear memory.  Not sure if this is required or not */
3675         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3676
3677         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3678         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3679
3680         return 0;
3681 }
3682
3683 struct hqd_registers
3684 {
3685         u32 cp_mqd_base_addr;
3686         u32 cp_mqd_base_addr_hi;
3687         u32 cp_hqd_active;
3688         u32 cp_hqd_vmid;
3689         u32 cp_hqd_persistent_state;
3690         u32 cp_hqd_pipe_priority;
3691         u32 cp_hqd_queue_priority;
3692         u32 cp_hqd_quantum;
3693         u32 cp_hqd_pq_base;
3694         u32 cp_hqd_pq_base_hi;
3695         u32 cp_hqd_pq_rptr;
3696         u32 cp_hqd_pq_rptr_report_addr;
3697         u32 cp_hqd_pq_rptr_report_addr_hi;
3698         u32 cp_hqd_pq_wptr_poll_addr;
3699         u32 cp_hqd_pq_wptr_poll_addr_hi;
3700         u32 cp_hqd_pq_doorbell_control;
3701         u32 cp_hqd_pq_wptr;
3702         u32 cp_hqd_pq_control;
3703         u32 cp_hqd_ib_base_addr;
3704         u32 cp_hqd_ib_base_addr_hi;
3705         u32 cp_hqd_ib_rptr;
3706         u32 cp_hqd_ib_control;
3707         u32 cp_hqd_iq_timer;
3708         u32 cp_hqd_iq_rptr;
3709         u32 cp_hqd_dequeue_request;
3710         u32 cp_hqd_dma_offload;
3711         u32 cp_hqd_sema_cmd;
3712         u32 cp_hqd_msg_type;
3713         u32 cp_hqd_atomic0_preop_lo;
3714         u32 cp_hqd_atomic0_preop_hi;
3715         u32 cp_hqd_atomic1_preop_lo;
3716         u32 cp_hqd_atomic1_preop_hi;
3717         u32 cp_hqd_hq_scheduler0;
3718         u32 cp_hqd_hq_scheduler1;
3719         u32 cp_mqd_control;
3720 };
3721
3722 struct bonaire_mqd
3723 {
3724         u32 header;
3725         u32 dispatch_initiator;
3726         u32 dimensions[3];
3727         u32 start_idx[3];
3728         u32 num_threads[3];
3729         u32 pipeline_stat_enable;
3730         u32 perf_counter_enable;
3731         u32 pgm[2];
3732         u32 tba[2];
3733         u32 tma[2];
3734         u32 pgm_rsrc[2];
3735         u32 vmid;
3736         u32 resource_limits;
3737         u32 static_thread_mgmt01[2];
3738         u32 tmp_ring_size;
3739         u32 static_thread_mgmt23[2];
3740         u32 restart[3];
3741         u32 thread_trace_enable;
3742         u32 reserved1;
3743         u32 user_data[16];
3744         u32 vgtcs_invoke_count[2];
3745         struct hqd_registers queue_state;
3746         u32 dequeue_cntr;
3747         u32 interrupt_queue[64];
3748 };
3749
3750 /**
3751  * cik_cp_compute_resume - setup the compute queue registers
3752  *
3753  * @rdev: radeon_device pointer
3754  *
3755  * Program the compute queues and test them to make sure they
3756  * are working.
3757  * Returns 0 for success, error for failure.
3758  */
3759 static int cik_cp_compute_resume(struct radeon_device *rdev)
3760 {
3761         int r, i, idx;
3762         u32 tmp;
3763         bool use_doorbell = true;
3764         u64 hqd_gpu_addr;
3765         u64 mqd_gpu_addr;
3766         u64 eop_gpu_addr;
3767         u64 wb_gpu_addr;
3768         u32 *buf;
3769         struct bonaire_mqd *mqd;
3770
3771         r = cik_cp_compute_start(rdev);
3772         if (r)
3773                 return r;
3774
3775         /* fix up chicken bits */
3776         tmp = RREG32(CP_CPF_DEBUG);
3777         tmp |= (1 << 23);
3778         WREG32(CP_CPF_DEBUG, tmp);
3779
3780         /* init the pipes */
3781         mutex_lock(&rdev->srbm_mutex);
3782         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3783                 int me = (i < 4) ? 1 : 2;
3784                 int pipe = (i < 4) ? i : (i - 4);
3785
3786                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3787
3788                 cik_srbm_select(rdev, me, pipe, 0, 0);
3789
3790                 /* write the EOP addr */
3791                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3792                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3793
3794                 /* set the VMID assigned */
3795                 WREG32(CP_HPD_EOP_VMID, 0);
3796
3797                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3798                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3799                 tmp &= ~EOP_SIZE_MASK;
3800                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3801                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3802         }
3803         cik_srbm_select(rdev, 0, 0, 0, 0);
3804         mutex_unlock(&rdev->srbm_mutex);
3805
3806         /* init the queues.  Just two for now. */
3807         for (i = 0; i < 2; i++) {
3808                 if (i == 0)
3809                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3810                 else
3811                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3812
3813                 if (rdev->ring[idx].mqd_obj == NULL) {
3814                         r = radeon_bo_create(rdev,
3815                                              sizeof(struct bonaire_mqd),
3816                                              PAGE_SIZE, true,
3817                                              RADEON_GEM_DOMAIN_GTT, NULL,
3818                                              &rdev->ring[idx].mqd_obj);
3819                         if (r) {
3820                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3821                                 return r;
3822                         }
3823                 }
3824
3825                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3826                 if (unlikely(r != 0)) {
3827                         cik_cp_compute_fini(rdev);
3828                         return r;
3829                 }
3830                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3831                                   &mqd_gpu_addr);
3832                 if (r) {
3833                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3834                         cik_cp_compute_fini(rdev);
3835                         return r;
3836                 }
3837                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3838                 if (r) {
3839                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3840                         cik_cp_compute_fini(rdev);
3841                         return r;
3842                 }
3843
3844                 /* doorbell offset */
3845                 rdev->ring[idx].doorbell_offset =
3846                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3847
3848                 /* init the mqd struct */
3849                 memset(buf, 0, sizeof(struct bonaire_mqd));
3850
3851                 mqd = (struct bonaire_mqd *)buf;
3852                 mqd->header = 0xC0310800;
3853                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3854                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3855                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3856                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3857
3858                 mutex_lock(&rdev->srbm_mutex);
3859                 cik_srbm_select(rdev, rdev->ring[idx].me,
3860                                 rdev->ring[idx].pipe,
3861                                 rdev->ring[idx].queue, 0);
3862
3863                 /* disable wptr polling */
3864                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3865                 tmp &= ~WPTR_POLL_EN;
3866                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3867
3868                 /* enable doorbell? */
3869                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3870                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3871                 if (use_doorbell)
3872                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3873                 else
3874                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3875                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3876                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3877
3878                 /* disable the queue if it's active */
3879                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3880                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3881                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3882                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3883                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3884                         for (i = 0; i < rdev->usec_timeout; i++) {
3885                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3886                                         break;
3887                                 udelay(1);
3888                         }
3889                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3890                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3891                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3892                 }
3893
3894                 /* set the pointer to the MQD */
3895                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3896                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3897                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3898                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3899                 /* set MQD vmid to 0 */
3900                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3901                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3902                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3903
3904                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3905                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3906                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3907                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3908                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3909                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3910
3911                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3912                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3913                 mqd->queue_state.cp_hqd_pq_control &=
3914                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3915
3916                 mqd->queue_state.cp_hqd_pq_control |=
3917                         order_base_2(rdev->ring[idx].ring_size / 8);
3918                 mqd->queue_state.cp_hqd_pq_control |=
3919                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3920 #ifdef __BIG_ENDIAN
3921                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3922 #endif
3923                 mqd->queue_state.cp_hqd_pq_control &=
3924                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3925                 mqd->queue_state.cp_hqd_pq_control |=
3926                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3927                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3928
3929                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3930                 if (i == 0)
3931                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3932                 else
3933                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3934                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3935                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3936                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3937                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3938                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3939
3940                 /* set the wb address wether it's enabled or not */
3941                 if (i == 0)
3942                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3943                 else
3944                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3945                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3946                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3947                         upper_32_bits(wb_gpu_addr) & 0xffff;
3948                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3949                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3950                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3951                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3952
3953                 /* enable the doorbell if requested */
3954                 if (use_doorbell) {
3955                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3956                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3957                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3958                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3959                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3960                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3961                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3962                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3963
3964                 } else {
3965                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3966                 }
3967                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3968                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3969
3970                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3971                 rdev->ring[idx].wptr = 0;
3972                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3973                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3974                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3975                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3976
3977                 /* set the vmid for the queue */
3978                 mqd->queue_state.cp_hqd_vmid = 0;
3979                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3980
3981                 /* activate the queue */
3982                 mqd->queue_state.cp_hqd_active = 1;
3983                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3984
3985                 cik_srbm_select(rdev, 0, 0, 0, 0);
3986                 mutex_unlock(&rdev->srbm_mutex);
3987
3988                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3989                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3990
3991                 rdev->ring[idx].ready = true;
3992                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3993                 if (r)
3994                         rdev->ring[idx].ready = false;
3995         }
3996
3997         return 0;
3998 }
3999
4000 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4001 {
4002         cik_cp_gfx_enable(rdev, enable);
4003         cik_cp_compute_enable(rdev, enable);
4004 }
4005
4006 static int cik_cp_load_microcode(struct radeon_device *rdev)
4007 {
4008         int r;
4009
4010         r = cik_cp_gfx_load_microcode(rdev);
4011         if (r)
4012                 return r;
4013         r = cik_cp_compute_load_microcode(rdev);
4014         if (r)
4015                 return r;
4016
4017         return 0;
4018 }
4019
4020 static void cik_cp_fini(struct radeon_device *rdev)
4021 {
4022         cik_cp_gfx_fini(rdev);
4023         cik_cp_compute_fini(rdev);
4024 }
4025
4026 static int cik_cp_resume(struct radeon_device *rdev)
4027 {
4028         int r;
4029
4030         cik_enable_gui_idle_interrupt(rdev, false);
4031
4032         r = cik_cp_load_microcode(rdev);
4033         if (r)
4034                 return r;
4035
4036         r = cik_cp_gfx_resume(rdev);
4037         if (r)
4038                 return r;
4039         r = cik_cp_compute_resume(rdev);
4040         if (r)
4041                 return r;
4042
4043         cik_enable_gui_idle_interrupt(rdev, true);
4044
4045         return 0;
4046 }
4047
4048 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4049 {
4050         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4051                 RREG32(GRBM_STATUS));
4052         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4053                 RREG32(GRBM_STATUS2));
4054         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4055                 RREG32(GRBM_STATUS_SE0));
4056         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4057                 RREG32(GRBM_STATUS_SE1));
4058         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4059                 RREG32(GRBM_STATUS_SE2));
4060         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4061                 RREG32(GRBM_STATUS_SE3));
4062         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4063                 RREG32(SRBM_STATUS));
4064         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4065                 RREG32(SRBM_STATUS2));
4066         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4067                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4068         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4069                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4070         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4071         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4072                  RREG32(CP_STALLED_STAT1));
4073         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4074                  RREG32(CP_STALLED_STAT2));
4075         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4076                  RREG32(CP_STALLED_STAT3));
4077         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4078                  RREG32(CP_CPF_BUSY_STAT));
4079         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4080                  RREG32(CP_CPF_STALLED_STAT1));
4081         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4082         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4083         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4084                  RREG32(CP_CPC_STALLED_STAT1));
4085         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4086 }
4087
4088 /**
4089  * cik_gpu_check_soft_reset - check which blocks are busy
4090  *
4091  * @rdev: radeon_device pointer
4092  *
4093  * Check which blocks are busy and return the relevant reset
4094  * mask to be used by cik_gpu_soft_reset().
4095  * Returns a mask of the blocks to be reset.
4096  */
4097 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4098 {
4099         u32 reset_mask = 0;
4100         u32 tmp;
4101
4102         /* GRBM_STATUS */
4103         tmp = RREG32(GRBM_STATUS);
4104         if (tmp & (PA_BUSY | SC_BUSY |
4105                    BCI_BUSY | SX_BUSY |
4106                    TA_BUSY | VGT_BUSY |
4107                    DB_BUSY | CB_BUSY |
4108                    GDS_BUSY | SPI_BUSY |
4109                    IA_BUSY | IA_BUSY_NO_DMA))
4110                 reset_mask |= RADEON_RESET_GFX;
4111
4112         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4113                 reset_mask |= RADEON_RESET_CP;
4114
4115         /* GRBM_STATUS2 */
4116         tmp = RREG32(GRBM_STATUS2);
4117         if (tmp & RLC_BUSY)
4118                 reset_mask |= RADEON_RESET_RLC;
4119
4120         /* SDMA0_STATUS_REG */
4121         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4122         if (!(tmp & SDMA_IDLE))
4123                 reset_mask |= RADEON_RESET_DMA;
4124
4125         /* SDMA1_STATUS_REG */
4126         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4127         if (!(tmp & SDMA_IDLE))
4128                 reset_mask |= RADEON_RESET_DMA1;
4129
4130         /* SRBM_STATUS2 */
4131         tmp = RREG32(SRBM_STATUS2);
4132         if (tmp & SDMA_BUSY)
4133                 reset_mask |= RADEON_RESET_DMA;
4134
4135         if (tmp & SDMA1_BUSY)
4136                 reset_mask |= RADEON_RESET_DMA1;
4137
4138         /* SRBM_STATUS */
4139         tmp = RREG32(SRBM_STATUS);
4140
4141         if (tmp & IH_BUSY)
4142                 reset_mask |= RADEON_RESET_IH;
4143
4144         if (tmp & SEM_BUSY)
4145                 reset_mask |= RADEON_RESET_SEM;
4146
4147         if (tmp & GRBM_RQ_PENDING)
4148                 reset_mask |= RADEON_RESET_GRBM;
4149
4150         if (tmp & VMC_BUSY)
4151                 reset_mask |= RADEON_RESET_VMC;
4152
4153         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4154                    MCC_BUSY | MCD_BUSY))
4155                 reset_mask |= RADEON_RESET_MC;
4156
4157         if (evergreen_is_display_hung(rdev))
4158                 reset_mask |= RADEON_RESET_DISPLAY;
4159
4160         /* Skip MC reset as it's mostly likely not hung, just busy */
4161         if (reset_mask & RADEON_RESET_MC) {
4162                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4163                 reset_mask &= ~RADEON_RESET_MC;
4164         }
4165
4166         return reset_mask;
4167 }
4168
4169 /**
4170  * cik_gpu_soft_reset - soft reset GPU
4171  *
4172  * @rdev: radeon_device pointer
4173  * @reset_mask: mask of which blocks to reset
4174  *
4175  * Soft reset the blocks specified in @reset_mask.
4176  */
4177 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4178 {
4179         struct evergreen_mc_save save;
4180         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4181         u32 tmp;
4182
4183         if (reset_mask == 0)
4184                 return;
4185
4186         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4187
4188         cik_print_gpu_status_regs(rdev);
4189         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4190                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4191         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4192                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4193
4194         /* disable CG/PG */
4195         cik_fini_pg(rdev);
4196         cik_fini_cg(rdev);
4197
4198         /* stop the rlc */
4199         cik_rlc_stop(rdev);
4200
4201         /* Disable GFX parsing/prefetching */
4202         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4203
4204         /* Disable MEC parsing/prefetching */
4205         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4206
4207         if (reset_mask & RADEON_RESET_DMA) {
4208                 /* sdma0 */
4209                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4210                 tmp |= SDMA_HALT;
4211                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4212         }
4213         if (reset_mask & RADEON_RESET_DMA1) {
4214                 /* sdma1 */
4215                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4216                 tmp |= SDMA_HALT;
4217                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4218         }
4219
4220         evergreen_mc_stop(rdev, &save);
4221         if (evergreen_mc_wait_for_idle(rdev)) {
4222                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4223         }
4224
4225         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4226                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4227
4228         if (reset_mask & RADEON_RESET_CP) {
4229                 grbm_soft_reset |= SOFT_RESET_CP;
4230
4231                 srbm_soft_reset |= SOFT_RESET_GRBM;
4232         }
4233
4234         if (reset_mask & RADEON_RESET_DMA)
4235                 srbm_soft_reset |= SOFT_RESET_SDMA;
4236
4237         if (reset_mask & RADEON_RESET_DMA1)
4238                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4239
4240         if (reset_mask & RADEON_RESET_DISPLAY)
4241                 srbm_soft_reset |= SOFT_RESET_DC;
4242
4243         if (reset_mask & RADEON_RESET_RLC)
4244                 grbm_soft_reset |= SOFT_RESET_RLC;
4245
4246         if (reset_mask & RADEON_RESET_SEM)
4247                 srbm_soft_reset |= SOFT_RESET_SEM;
4248
4249         if (reset_mask & RADEON_RESET_IH)
4250                 srbm_soft_reset |= SOFT_RESET_IH;
4251
4252         if (reset_mask & RADEON_RESET_GRBM)
4253                 srbm_soft_reset |= SOFT_RESET_GRBM;
4254
4255         if (reset_mask & RADEON_RESET_VMC)
4256                 srbm_soft_reset |= SOFT_RESET_VMC;
4257
4258         if (!(rdev->flags & RADEON_IS_IGP)) {
4259                 if (reset_mask & RADEON_RESET_MC)
4260                         srbm_soft_reset |= SOFT_RESET_MC;
4261         }
4262
4263         if (grbm_soft_reset) {
4264                 tmp = RREG32(GRBM_SOFT_RESET);
4265                 tmp |= grbm_soft_reset;
4266                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4267                 WREG32(GRBM_SOFT_RESET, tmp);
4268                 tmp = RREG32(GRBM_SOFT_RESET);
4269
4270                 udelay(50);
4271
4272                 tmp &= ~grbm_soft_reset;
4273                 WREG32(GRBM_SOFT_RESET, tmp);
4274                 tmp = RREG32(GRBM_SOFT_RESET);
4275         }
4276
4277         if (srbm_soft_reset) {
4278                 tmp = RREG32(SRBM_SOFT_RESET);
4279                 tmp |= srbm_soft_reset;
4280                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4281                 WREG32(SRBM_SOFT_RESET, tmp);
4282                 tmp = RREG32(SRBM_SOFT_RESET);
4283
4284                 udelay(50);
4285
4286                 tmp &= ~srbm_soft_reset;
4287                 WREG32(SRBM_SOFT_RESET, tmp);
4288                 tmp = RREG32(SRBM_SOFT_RESET);
4289         }
4290
4291         /* Wait a little for things to settle down */
4292         udelay(50);
4293
4294         evergreen_mc_resume(rdev, &save);
4295         udelay(50);
4296
4297         cik_print_gpu_status_regs(rdev);
4298 }
4299
4300 /**
4301  * cik_asic_reset - soft reset GPU
4302  *
4303  * @rdev: radeon_device pointer
4304  *
4305  * Look up which blocks are hung and attempt
4306  * to reset them.
4307  * Returns 0 for success.
4308  */
4309 int cik_asic_reset(struct radeon_device *rdev)
4310 {
4311         u32 reset_mask;
4312
4313         reset_mask = cik_gpu_check_soft_reset(rdev);
4314
4315         if (reset_mask)
4316                 r600_set_bios_scratch_engine_hung(rdev, true);
4317
4318         cik_gpu_soft_reset(rdev, reset_mask);
4319
4320         reset_mask = cik_gpu_check_soft_reset(rdev);
4321
4322         if (!reset_mask)
4323                 r600_set_bios_scratch_engine_hung(rdev, false);
4324
4325         return 0;
4326 }
4327
4328 /**
4329  * cik_gfx_is_lockup - check if the 3D engine is locked up
4330  *
4331  * @rdev: radeon_device pointer
4332  * @ring: radeon_ring structure holding ring information
4333  *
4334  * Check if the 3D engine is locked up (CIK).
4335  * Returns true if the engine is locked, false if not.
4336  */
4337 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4338 {
4339         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4340
4341         if (!(reset_mask & (RADEON_RESET_GFX |
4342                             RADEON_RESET_COMPUTE |
4343                             RADEON_RESET_CP))) {
4344                 radeon_ring_lockup_update(ring);
4345                 return false;
4346         }
4347         /* force CP activities */
4348         radeon_ring_force_activity(rdev, ring);
4349         return radeon_ring_test_lockup(rdev, ring);
4350 }
4351
4352 /* MC */
4353 /**
4354  * cik_mc_program - program the GPU memory controller
4355  *
4356  * @rdev: radeon_device pointer
4357  *
4358  * Set the location of vram, gart, and AGP in the GPU's
4359  * physical address space (CIK).
4360  */
4361 static void cik_mc_program(struct radeon_device *rdev)
4362 {
4363         struct evergreen_mc_save save;
4364         u32 tmp;
4365         int i, j;
4366
4367         /* Initialize HDP */
4368         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4369                 WREG32((0x2c14 + j), 0x00000000);
4370                 WREG32((0x2c18 + j), 0x00000000);
4371                 WREG32((0x2c1c + j), 0x00000000);
4372                 WREG32((0x2c20 + j), 0x00000000);
4373                 WREG32((0x2c24 + j), 0x00000000);
4374         }
4375         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4376
4377         evergreen_mc_stop(rdev, &save);
4378         if (radeon_mc_wait_for_idle(rdev)) {
4379                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4380         }
4381         /* Lockout access through VGA aperture*/
4382         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4383         /* Update configuration */
4384         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4385                rdev->mc.vram_start >> 12);
4386         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4387                rdev->mc.vram_end >> 12);
4388         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4389                rdev->vram_scratch.gpu_addr >> 12);
4390         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4391         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4392         WREG32(MC_VM_FB_LOCATION, tmp);
4393         /* XXX double check these! */
4394         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4395         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4396         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4397         WREG32(MC_VM_AGP_BASE, 0);
4398         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4399         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4400         if (radeon_mc_wait_for_idle(rdev)) {
4401                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4402         }
4403         evergreen_mc_resume(rdev, &save);
4404         /* we need to own VRAM, so turn off the VGA renderer here
4405          * to stop it overwriting our objects */
4406         rv515_vga_render_disable(rdev);
4407 }
4408
4409 /**
4410  * cik_mc_init - initialize the memory controller driver params
4411  *
4412  * @rdev: radeon_device pointer
4413  *
4414  * Look up the amount of vram, vram width, and decide how to place
4415  * vram and gart within the GPU's physical address space (CIK).
4416  * Returns 0 for success.
4417  */
4418 static int cik_mc_init(struct radeon_device *rdev)
4419 {
4420         u32 tmp;
4421         int chansize, numchan;
4422
4423         /* Get VRAM informations */
4424         rdev->mc.vram_is_ddr = true;
4425         tmp = RREG32(MC_ARB_RAMCFG);
4426         if (tmp & CHANSIZE_MASK) {
4427                 chansize = 64;
4428         } else {
4429                 chansize = 32;
4430         }
4431         tmp = RREG32(MC_SHARED_CHMAP);
4432         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4433         case 0:
4434         default:
4435                 numchan = 1;
4436                 break;
4437         case 1:
4438                 numchan = 2;
4439                 break;
4440         case 2:
4441                 numchan = 4;
4442                 break;
4443         case 3:
4444                 numchan = 8;
4445                 break;
4446         case 4:
4447                 numchan = 3;
4448                 break;
4449         case 5:
4450                 numchan = 6;
4451                 break;
4452         case 6:
4453                 numchan = 10;
4454                 break;
4455         case 7:
4456                 numchan = 12;
4457                 break;
4458         case 8:
4459                 numchan = 16;
4460                 break;
4461         }
4462         rdev->mc.vram_width = numchan * chansize;
4463         /* Could aper size report 0 ? */
4464         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4465         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4466         /* size in MB on si */
4467         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4468         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4469         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4470         si_vram_gtt_location(rdev, &rdev->mc);
4471         radeon_update_bandwidth_info(rdev);
4472
4473         return 0;
4474 }
4475
4476 /*
4477  * GART
4478  * VMID 0 is the physical GPU addresses as used by the kernel.
4479  * VMIDs 1-15 are used for userspace clients and are handled
4480  * by the radeon vm/hsa code.
4481  */
4482 /**
4483  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4484  *
4485  * @rdev: radeon_device pointer
4486  *
4487  * Flush the TLB for the VMID 0 page table (CIK).
4488  */
4489 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4490 {
4491         /* flush hdp cache */
4492         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4493
4494         /* bits 0-15 are the VM contexts0-15 */
4495         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4496 }
4497
4498 /**
4499  * cik_pcie_gart_enable - gart enable
4500  *
4501  * @rdev: radeon_device pointer
4502  *
4503  * This sets up the TLBs, programs the page tables for VMID0,
4504  * sets up the hw for VMIDs 1-15 which are allocated on
4505  * demand, and sets up the global locations for the LDS, GDS,
4506  * and GPUVM for FSA64 clients (CIK).
4507  * Returns 0 for success, errors for failure.
4508  */
4509 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4510 {
4511         int r, i;
4512
4513         if (rdev->gart.robj == NULL) {
4514                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4515                 return -EINVAL;
4516         }
4517         r = radeon_gart_table_vram_pin(rdev);
4518         if (r)
4519                 return r;
4520         radeon_gart_restore(rdev);
4521         /* Setup TLB control */
4522         WREG32(MC_VM_MX_L1_TLB_CNTL,
4523                (0xA << 7) |
4524                ENABLE_L1_TLB |
4525                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4526                ENABLE_ADVANCED_DRIVER_MODEL |
4527                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4528         /* Setup L2 cache */
4529         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4530                ENABLE_L2_FRAGMENT_PROCESSING |
4531                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4532                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4533                EFFECTIVE_L2_QUEUE_SIZE(7) |
4534                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4535         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4536         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4537                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4538         /* setup context0 */
4539         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4540         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4541         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4542         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4543                         (u32)(rdev->dummy_page.addr >> 12));
4544         WREG32(VM_CONTEXT0_CNTL2, 0);
4545         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4546                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4547
4548         WREG32(0x15D4, 0);
4549         WREG32(0x15D8, 0);
4550         WREG32(0x15DC, 0);
4551
4552         /* empty context1-15 */
4553         /* FIXME start with 4G, once using 2 level pt switch to full
4554          * vm size space
4555          */
4556         /* set vm size, must be a multiple of 4 */
4557         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4558         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4559         for (i = 1; i < 16; i++) {
4560                 if (i < 8)
4561                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4562                                rdev->gart.table_addr >> 12);
4563                 else
4564                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4565                                rdev->gart.table_addr >> 12);
4566         }
4567
4568         /* enable context1-15 */
4569         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4570                (u32)(rdev->dummy_page.addr >> 12));
4571         WREG32(VM_CONTEXT1_CNTL2, 4);
4572         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4573                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4575                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4577                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4578                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4579                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4581                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4582                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4583                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4584                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4585
4586         /* TC cache setup ??? */
4587         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4588         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4589         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4590
4591         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4592         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4593         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4594         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4595         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4596
4597         WREG32(TC_CFG_L1_VOLATILE, 0);
4598         WREG32(TC_CFG_L2_VOLATILE, 0);
4599
4600         if (rdev->family == CHIP_KAVERI) {
4601                 u32 tmp = RREG32(CHUB_CONTROL);
4602                 tmp &= ~BYPASS_VM;
4603                 WREG32(CHUB_CONTROL, tmp);
4604         }
4605
4606         /* XXX SH_MEM regs */
4607         /* where to put LDS, scratch, GPUVM in FSA64 space */
4608         mutex_lock(&rdev->srbm_mutex);
4609         for (i = 0; i < 16; i++) {
4610                 cik_srbm_select(rdev, 0, 0, 0, i);
4611                 /* CP and shaders */
4612                 WREG32(SH_MEM_CONFIG, 0);
4613                 WREG32(SH_MEM_APE1_BASE, 1);
4614                 WREG32(SH_MEM_APE1_LIMIT, 0);
4615                 WREG32(SH_MEM_BASES, 0);
4616                 /* SDMA GFX */
4617                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4618                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4619                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4620                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4621                 /* XXX SDMA RLC - todo */
4622         }
4623         cik_srbm_select(rdev, 0, 0, 0, 0);
4624         mutex_unlock(&rdev->srbm_mutex);
4625
4626         cik_pcie_gart_tlb_flush(rdev);
4627         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4628                  (unsigned)(rdev->mc.gtt_size >> 20),
4629                  (unsigned long long)rdev->gart.table_addr);
4630         rdev->gart.ready = true;
4631         return 0;
4632 }
4633
4634 /**
4635  * cik_pcie_gart_disable - gart disable
4636  *
4637  * @rdev: radeon_device pointer
4638  *
4639  * This disables all VM page table (CIK).
4640  */
4641 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4642 {
4643         /* Disable all tables */
4644         WREG32(VM_CONTEXT0_CNTL, 0);
4645         WREG32(VM_CONTEXT1_CNTL, 0);
4646         /* Setup TLB control */
4647         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4648                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4649         /* Setup L2 cache */
4650         WREG32(VM_L2_CNTL,
4651                ENABLE_L2_FRAGMENT_PROCESSING |
4652                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4653                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4654                EFFECTIVE_L2_QUEUE_SIZE(7) |
4655                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4656         WREG32(VM_L2_CNTL2, 0);
4657         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4658                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4659         radeon_gart_table_vram_unpin(rdev);
4660 }
4661
4662 /**
4663  * cik_pcie_gart_fini - vm fini callback
4664  *
4665  * @rdev: radeon_device pointer
4666  *
4667  * Tears down the driver GART/VM setup (CIK).
4668  */
4669 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4670 {
4671         cik_pcie_gart_disable(rdev);
4672         radeon_gart_table_vram_free(rdev);
4673         radeon_gart_fini(rdev);
4674 }
4675
4676 /* vm parser */
4677 /**
4678  * cik_ib_parse - vm ib_parse callback
4679  *
4680  * @rdev: radeon_device pointer
4681  * @ib: indirect buffer pointer
4682  *
4683  * CIK uses hw IB checking so this is a nop (CIK).
4684  */
4685 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4686 {
4687         return 0;
4688 }
4689
4690 /*
4691  * vm
4692  * VMID 0 is the physical GPU addresses as used by the kernel.
4693  * VMIDs 1-15 are used for userspace clients and are handled
4694  * by the radeon vm/hsa code.
4695  */
4696 /**
4697  * cik_vm_init - cik vm init callback
4698  *
4699  * @rdev: radeon_device pointer
4700  *
4701  * Inits cik specific vm parameters (number of VMs, base of vram for
4702  * VMIDs 1-15) (CIK).
4703  * Returns 0 for success.
4704  */
4705 int cik_vm_init(struct radeon_device *rdev)
4706 {
4707         /* number of VMs */
4708         rdev->vm_manager.nvm = 16;
4709         /* base offset of vram pages */
4710         if (rdev->flags & RADEON_IS_IGP) {
4711                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4712                 tmp <<= 22;
4713                 rdev->vm_manager.vram_base_offset = tmp;
4714         } else
4715                 rdev->vm_manager.vram_base_offset = 0;
4716
4717         return 0;
4718 }
4719
4720 /**
4721  * cik_vm_fini - cik vm fini callback
4722  *
4723  * @rdev: radeon_device pointer
4724  *
4725  * Tear down any asic specific VM setup (CIK).
4726  */
4727 void cik_vm_fini(struct radeon_device *rdev)
4728 {
4729 }
4730
4731 /**
4732  * cik_vm_decode_fault - print human readable fault info
4733  *
4734  * @rdev: radeon_device pointer
4735  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4736  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4737  *
4738  * Print human readable fault information (CIK).
4739  */
4740 static void cik_vm_decode_fault(struct radeon_device *rdev,
4741                                 u32 status, u32 addr, u32 mc_client)
4742 {
4743         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4744         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4745         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4746         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4747                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4748
4749         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4750                protections, vmid, addr,
4751                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4752                block, mc_client, mc_id);
4753 }
4754
4755 /**
4756  * cik_vm_flush - cik vm flush using the CP
4757  *
4758  * @rdev: radeon_device pointer
4759  *
4760  * Update the page table base and flush the VM TLB
4761  * using the CP (CIK).
4762  */
4763 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4764 {
4765         struct radeon_ring *ring = &rdev->ring[ridx];
4766
4767         if (vm == NULL)
4768                 return;
4769
4770         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4771         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4772                                  WRITE_DATA_DST_SEL(0)));
4773         if (vm->id < 8) {
4774                 radeon_ring_write(ring,
4775                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4776         } else {
4777                 radeon_ring_write(ring,
4778                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4779         }
4780         radeon_ring_write(ring, 0);
4781         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4782
4783         /* update SH_MEM_* regs */
4784         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4785         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4786                                  WRITE_DATA_DST_SEL(0)));
4787         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4788         radeon_ring_write(ring, 0);
4789         radeon_ring_write(ring, VMID(vm->id));
4790
4791         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4792         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793                                  WRITE_DATA_DST_SEL(0)));
4794         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4795         radeon_ring_write(ring, 0);
4796
4797         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4798         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4799         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4800         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4801
4802         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4803         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4804                                  WRITE_DATA_DST_SEL(0)));
4805         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4806         radeon_ring_write(ring, 0);
4807         radeon_ring_write(ring, VMID(0));
4808
4809         /* HDP flush */
4810         /* We should be using the WAIT_REG_MEM packet here like in
4811          * cik_fence_ring_emit(), but it causes the CP to hang in this
4812          * context...
4813          */
4814         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4815         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4816                                  WRITE_DATA_DST_SEL(0)));
4817         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4818         radeon_ring_write(ring, 0);
4819         radeon_ring_write(ring, 0);
4820
4821         /* bits 0-15 are the VM contexts0-15 */
4822         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4823         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4824                                  WRITE_DATA_DST_SEL(0)));
4825         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4826         radeon_ring_write(ring, 0);
4827         radeon_ring_write(ring, 1 << vm->id);
4828
4829         /* compute doesn't have PFP */
4830         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4831                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4832                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4833                 radeon_ring_write(ring, 0x0);
4834         }
4835 }
4836
4837 /**
4838  * cik_vm_set_page - update the page tables using sDMA
4839  *
4840  * @rdev: radeon_device pointer
4841  * @ib: indirect buffer to fill with commands
4842  * @pe: addr of the page entry
4843  * @addr: dst addr to write into pe
4844  * @count: number of page entries to update
4845  * @incr: increase next addr by incr bytes
4846  * @flags: access flags
4847  *
4848  * Update the page tables using CP or sDMA (CIK).
4849  */
4850 void cik_vm_set_page(struct radeon_device *rdev,
4851                      struct radeon_ib *ib,
4852                      uint64_t pe,
4853                      uint64_t addr, unsigned count,
4854                      uint32_t incr, uint32_t flags)
4855 {
4856         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4857         uint64_t value;
4858         unsigned ndw;
4859
4860         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4861                 /* CP */
4862                 while (count) {
4863                         ndw = 2 + count * 2;
4864                         if (ndw > 0x3FFE)
4865                                 ndw = 0x3FFE;
4866
4867                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4868                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4869                                                     WRITE_DATA_DST_SEL(1));
4870                         ib->ptr[ib->length_dw++] = pe;
4871                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4872                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4873                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4874                                         value = radeon_vm_map_gart(rdev, addr);
4875                                         value &= 0xFFFFFFFFFFFFF000ULL;
4876                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4877                                         value = addr;
4878                                 } else {
4879                                         value = 0;
4880                                 }
4881                                 addr += incr;
4882                                 value |= r600_flags;
4883                                 ib->ptr[ib->length_dw++] = value;
4884                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4885                         }
4886                 }
4887         } else {
4888                 /* DMA */
4889                 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4890         }
4891 }
4892
4893 /*
4894  * RLC
4895  * The RLC is a multi-purpose microengine that handles a
4896  * variety of functions, the most important of which is
4897  * the interrupt controller.
4898  */
4899 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4900                                           bool enable)
4901 {
4902         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4903
4904         if (enable)
4905                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4906         else
4907                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4908         WREG32(CP_INT_CNTL_RING0, tmp);
4909 }
4910
4911 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4912 {
4913         u32 tmp;
4914
4915         tmp = RREG32(RLC_LB_CNTL);
4916         if (enable)
4917                 tmp |= LOAD_BALANCE_ENABLE;
4918         else
4919                 tmp &= ~LOAD_BALANCE_ENABLE;
4920         WREG32(RLC_LB_CNTL, tmp);
4921 }
4922
4923 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4924 {
4925         u32 i, j, k;
4926         u32 mask;
4927
4928         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4929                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4930                         cik_select_se_sh(rdev, i, j);
4931                         for (k = 0; k < rdev->usec_timeout; k++) {
4932                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4933                                         break;
4934                                 udelay(1);
4935                         }
4936                 }
4937         }
4938         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4939
4940         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4941         for (k = 0; k < rdev->usec_timeout; k++) {
4942                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4943                         break;
4944                 udelay(1);
4945         }
4946 }
4947
4948 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4949 {
4950         u32 tmp;
4951
4952         tmp = RREG32(RLC_CNTL);
4953         if (tmp != rlc)
4954                 WREG32(RLC_CNTL, rlc);
4955 }
4956
4957 static u32 cik_halt_rlc(struct radeon_device *rdev)
4958 {
4959         u32 data, orig;
4960
4961         orig = data = RREG32(RLC_CNTL);
4962
4963         if (data & RLC_ENABLE) {
4964                 u32 i;
4965
4966                 data &= ~RLC_ENABLE;
4967                 WREG32(RLC_CNTL, data);
4968
4969                 for (i = 0; i < rdev->usec_timeout; i++) {
4970                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4971                                 break;
4972                         udelay(1);
4973                 }
4974
4975                 cik_wait_for_rlc_serdes(rdev);
4976         }
4977
4978         return orig;
4979 }
4980
4981 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4982 {
4983         u32 tmp, i, mask;
4984
4985         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4986         WREG32(RLC_GPR_REG2, tmp);
4987
4988         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4989         for (i = 0; i < rdev->usec_timeout; i++) {
4990                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4991                         break;
4992                 udelay(1);
4993         }
4994
4995         for (i = 0; i < rdev->usec_timeout; i++) {
4996                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4997                         break;
4998                 udelay(1);
4999         }
5000 }
5001
5002 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5003 {
5004         u32 tmp;
5005
5006         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5007         WREG32(RLC_GPR_REG2, tmp);
5008 }
5009
5010 /**
5011  * cik_rlc_stop - stop the RLC ME
5012  *
5013  * @rdev: radeon_device pointer
5014  *
5015  * Halt the RLC ME (MicroEngine) (CIK).
5016  */
5017 static void cik_rlc_stop(struct radeon_device *rdev)
5018 {
5019         WREG32(RLC_CNTL, 0);
5020
5021         cik_enable_gui_idle_interrupt(rdev, false);
5022
5023         cik_wait_for_rlc_serdes(rdev);
5024 }
5025
5026 /**
5027  * cik_rlc_start - start the RLC ME
5028  *
5029  * @rdev: radeon_device pointer
5030  *
5031  * Unhalt the RLC ME (MicroEngine) (CIK).
5032  */
5033 static void cik_rlc_start(struct radeon_device *rdev)
5034 {
5035         WREG32(RLC_CNTL, RLC_ENABLE);
5036
5037         cik_enable_gui_idle_interrupt(rdev, true);
5038
5039         udelay(50);
5040 }
5041
5042 /**
5043  * cik_rlc_resume - setup the RLC hw
5044  *
5045  * @rdev: radeon_device pointer
5046  *
5047  * Initialize the RLC registers, load the ucode,
5048  * and start the RLC (CIK).
5049  * Returns 0 for success, -EINVAL if the ucode is not available.
5050  */
5051 static int cik_rlc_resume(struct radeon_device *rdev)
5052 {
5053         u32 i, size, tmp;
5054         const __be32 *fw_data;
5055
5056         if (!rdev->rlc_fw)
5057                 return -EINVAL;
5058
5059         switch (rdev->family) {
5060         case CHIP_BONAIRE:
5061         default:
5062                 size = BONAIRE_RLC_UCODE_SIZE;
5063                 break;
5064         case CHIP_KAVERI:
5065                 size = KV_RLC_UCODE_SIZE;
5066                 break;
5067         case CHIP_KABINI:
5068                 size = KB_RLC_UCODE_SIZE;
5069                 break;
5070         }
5071
5072         cik_rlc_stop(rdev);
5073
5074         /* disable CG */
5075         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5076         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5077
5078         si_rlc_reset(rdev);
5079
5080         cik_init_pg(rdev);
5081
5082         cik_init_cg(rdev);
5083
5084         WREG32(RLC_LB_CNTR_INIT, 0);
5085         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5086
5087         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5088         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5089         WREG32(RLC_LB_PARAMS, 0x00600408);
5090         WREG32(RLC_LB_CNTL, 0x80000004);
5091
5092         WREG32(RLC_MC_CNTL, 0);
5093         WREG32(RLC_UCODE_CNTL, 0);
5094
5095         fw_data = (const __be32 *)rdev->rlc_fw->data;
5096                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5097         for (i = 0; i < size; i++)
5098                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5099         WREG32(RLC_GPM_UCODE_ADDR, 0);
5100
5101         /* XXX - find out what chips support lbpw */
5102         cik_enable_lbpw(rdev, false);
5103
5104         if (rdev->family == CHIP_BONAIRE)
5105                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5106
5107         cik_rlc_start(rdev);
5108
5109         return 0;
5110 }
5111
5112 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5113 {
5114         u32 data, orig, tmp, tmp2;
5115
5116         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5117
5118         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5119                 cik_enable_gui_idle_interrupt(rdev, true);
5120
5121                 tmp = cik_halt_rlc(rdev);
5122
5123                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5124                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5125                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5126                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5127                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5128
5129                 cik_update_rlc(rdev, tmp);
5130
5131                 data |= CGCG_EN | CGLS_EN;
5132         } else {
5133                 cik_enable_gui_idle_interrupt(rdev, false);
5134
5135                 RREG32(CB_CGTT_SCLK_CTRL);
5136                 RREG32(CB_CGTT_SCLK_CTRL);
5137                 RREG32(CB_CGTT_SCLK_CTRL);
5138                 RREG32(CB_CGTT_SCLK_CTRL);
5139
5140                 data &= ~(CGCG_EN | CGLS_EN);
5141         }
5142
5143         if (orig != data)
5144                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5145
5146 }
5147
5148 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5149 {
5150         u32 data, orig, tmp = 0;
5151
5152         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5153                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5154                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5155                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5156                                 data |= CP_MEM_LS_EN;
5157                                 if (orig != data)
5158                                         WREG32(CP_MEM_SLP_CNTL, data);
5159                         }
5160                 }
5161
5162                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5163                 data &= 0xfffffffd;
5164                 if (orig != data)
5165                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5166
5167                 tmp = cik_halt_rlc(rdev);
5168
5169                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5170                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5171                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5172                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5173                 WREG32(RLC_SERDES_WR_CTRL, data);
5174
5175                 cik_update_rlc(rdev, tmp);
5176
5177                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5178                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5179                         data &= ~SM_MODE_MASK;
5180                         data |= SM_MODE(0x2);
5181                         data |= SM_MODE_ENABLE;
5182                         data &= ~CGTS_OVERRIDE;
5183                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5184                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5185                                 data &= ~CGTS_LS_OVERRIDE;
5186                         data &= ~ON_MONITOR_ADD_MASK;
5187                         data |= ON_MONITOR_ADD_EN;
5188                         data |= ON_MONITOR_ADD(0x96);
5189                         if (orig != data)
5190                                 WREG32(CGTS_SM_CTRL_REG, data);
5191                 }
5192         } else {
5193                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5194                 data |= 0x00000002;
5195                 if (orig != data)
5196                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5197
5198                 data = RREG32(RLC_MEM_SLP_CNTL);
5199                 if (data & RLC_MEM_LS_EN) {
5200                         data &= ~RLC_MEM_LS_EN;
5201                         WREG32(RLC_MEM_SLP_CNTL, data);
5202                 }
5203
5204                 data = RREG32(CP_MEM_SLP_CNTL);
5205                 if (data & CP_MEM_LS_EN) {
5206                         data &= ~CP_MEM_LS_EN;
5207                         WREG32(CP_MEM_SLP_CNTL, data);
5208                 }
5209
5210                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5211                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5212                 if (orig != data)
5213                         WREG32(CGTS_SM_CTRL_REG, data);
5214
5215                 tmp = cik_halt_rlc(rdev);
5216
5217                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5218                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5219                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5220                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5221                 WREG32(RLC_SERDES_WR_CTRL, data);
5222
5223                 cik_update_rlc(rdev, tmp);
5224         }
5225 }
5226
5227 static const u32 mc_cg_registers[] =
5228 {
5229         MC_HUB_MISC_HUB_CG,
5230         MC_HUB_MISC_SIP_CG,
5231         MC_HUB_MISC_VM_CG,
5232         MC_XPB_CLK_GAT,
5233         ATC_MISC_CG,
5234         MC_CITF_MISC_WR_CG,
5235         MC_CITF_MISC_RD_CG,
5236         MC_CITF_MISC_VM_CG,
5237         VM_L2_CG,
5238 };
5239
5240 static void cik_enable_mc_ls(struct radeon_device *rdev,
5241                              bool enable)
5242 {
5243         int i;
5244         u32 orig, data;
5245
5246         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5247                 orig = data = RREG32(mc_cg_registers[i]);
5248                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5249                         data |= MC_LS_ENABLE;
5250                 else
5251                         data &= ~MC_LS_ENABLE;
5252                 if (data != orig)
5253                         WREG32(mc_cg_registers[i], data);
5254         }
5255 }
5256
5257 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5258                                bool enable)
5259 {
5260         int i;
5261         u32 orig, data;
5262
5263         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5264                 orig = data = RREG32(mc_cg_registers[i]);
5265                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5266                         data |= MC_CG_ENABLE;
5267                 else
5268                         data &= ~MC_CG_ENABLE;
5269                 if (data != orig)
5270                         WREG32(mc_cg_registers[i], data);
5271         }
5272 }
5273
5274 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5275                                  bool enable)
5276 {
5277         u32 orig, data;
5278
5279         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5280                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5281                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5282         } else {
5283                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5284                 data |= 0xff000000;
5285                 if (data != orig)
5286                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5287
5288                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5289                 data |= 0xff000000;
5290                 if (data != orig)
5291                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5292         }
5293 }
5294
5295 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5296                                  bool enable)
5297 {
5298         u32 orig, data;
5299
5300         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5301                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5302                 data |= 0x100;
5303                 if (orig != data)
5304                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5305
5306                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5307                 data |= 0x100;
5308                 if (orig != data)
5309                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5310         } else {
5311                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5312                 data &= ~0x100;
5313                 if (orig != data)
5314                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5315
5316                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5317                 data &= ~0x100;
5318                 if (orig != data)
5319                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5320         }
5321 }
5322
5323 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5324                                 bool enable)
5325 {
5326         u32 orig, data;
5327
5328         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5329                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5330                 data = 0xfff;
5331                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5332
5333                 orig = data = RREG32(UVD_CGC_CTRL);
5334                 data |= DCM;
5335                 if (orig != data)
5336                         WREG32(UVD_CGC_CTRL, data);
5337         } else {
5338                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5339                 data &= ~0xfff;
5340                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5341
5342                 orig = data = RREG32(UVD_CGC_CTRL);
5343                 data &= ~DCM;
5344                 if (orig != data)
5345                         WREG32(UVD_CGC_CTRL, data);
5346         }
5347 }
5348
5349 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5350                                bool enable)
5351 {
5352         u32 orig, data;
5353
5354         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5355
5356         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5357                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5358                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5359         else
5360                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5361                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5362
5363         if (orig != data)
5364                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5365 }
5366
5367 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5368                                 bool enable)
5369 {
5370         u32 orig, data;
5371
5372         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5373
5374         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5375                 data &= ~CLOCK_GATING_DIS;
5376         else
5377                 data |= CLOCK_GATING_DIS;
5378
5379         if (orig != data)
5380                 WREG32(HDP_HOST_PATH_CNTL, data);
5381 }
5382
5383 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5384                               bool enable)
5385 {
5386         u32 orig, data;
5387
5388         orig = data = RREG32(HDP_MEM_POWER_LS);
5389
5390         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5391                 data |= HDP_LS_ENABLE;
5392         else
5393                 data &= ~HDP_LS_ENABLE;
5394
5395         if (orig != data)
5396                 WREG32(HDP_MEM_POWER_LS, data);
5397 }
5398
5399 void cik_update_cg(struct radeon_device *rdev,
5400                    u32 block, bool enable)
5401 {
5402
5403         if (block & RADEON_CG_BLOCK_GFX) {
5404                 cik_enable_gui_idle_interrupt(rdev, false);
5405                 /* order matters! */
5406                 if (enable) {
5407                         cik_enable_mgcg(rdev, true);
5408                         cik_enable_cgcg(rdev, true);
5409                 } else {
5410                         cik_enable_cgcg(rdev, false);
5411                         cik_enable_mgcg(rdev, false);
5412                 }
5413                 cik_enable_gui_idle_interrupt(rdev, true);
5414         }
5415
5416         if (block & RADEON_CG_BLOCK_MC) {
5417                 if (!(rdev->flags & RADEON_IS_IGP)) {
5418                         cik_enable_mc_mgcg(rdev, enable);
5419                         cik_enable_mc_ls(rdev, enable);
5420                 }
5421         }
5422
5423         if (block & RADEON_CG_BLOCK_SDMA) {
5424                 cik_enable_sdma_mgcg(rdev, enable);
5425                 cik_enable_sdma_mgls(rdev, enable);
5426         }
5427
5428         if (block & RADEON_CG_BLOCK_BIF) {
5429                 cik_enable_bif_mgls(rdev, enable);
5430         }
5431
5432         if (block & RADEON_CG_BLOCK_UVD) {
5433                 if (rdev->has_uvd)
5434                         cik_enable_uvd_mgcg(rdev, enable);
5435         }
5436
5437         if (block & RADEON_CG_BLOCK_HDP) {
5438                 cik_enable_hdp_mgcg(rdev, enable);
5439                 cik_enable_hdp_ls(rdev, enable);
5440         }
5441 }
5442
5443 static void cik_init_cg(struct radeon_device *rdev)
5444 {
5445
5446         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5447
5448         if (rdev->has_uvd)
5449                 si_init_uvd_internal_cg(rdev);
5450
5451         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5452                              RADEON_CG_BLOCK_SDMA |
5453                              RADEON_CG_BLOCK_BIF |
5454                              RADEON_CG_BLOCK_UVD |
5455                              RADEON_CG_BLOCK_HDP), true);
5456 }
5457
5458 static void cik_fini_cg(struct radeon_device *rdev)
5459 {
5460         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5461                              RADEON_CG_BLOCK_SDMA |
5462                              RADEON_CG_BLOCK_BIF |
5463                              RADEON_CG_BLOCK_UVD |
5464                              RADEON_CG_BLOCK_HDP), false);
5465
5466         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5467 }
5468
5469 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5470                                           bool enable)
5471 {
5472         u32 data, orig;
5473
5474         orig = data = RREG32(RLC_PG_CNTL);
5475         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5476                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5477         else
5478                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5479         if (orig != data)
5480                 WREG32(RLC_PG_CNTL, data);
5481 }
5482
5483 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5484                                           bool enable)
5485 {
5486         u32 data, orig;
5487
5488         orig = data = RREG32(RLC_PG_CNTL);
5489         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5490                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5491         else
5492                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5493         if (orig != data)
5494                 WREG32(RLC_PG_CNTL, data);
5495 }
5496
5497 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5498 {
5499         u32 data, orig;
5500
5501         orig = data = RREG32(RLC_PG_CNTL);
5502         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5503                 data &= ~DISABLE_CP_PG;
5504         else
5505                 data |= DISABLE_CP_PG;
5506         if (orig != data)
5507                 WREG32(RLC_PG_CNTL, data);
5508 }
5509
5510 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5511 {
5512         u32 data, orig;
5513
5514         orig = data = RREG32(RLC_PG_CNTL);
5515         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5516                 data &= ~DISABLE_GDS_PG;
5517         else
5518                 data |= DISABLE_GDS_PG;
5519         if (orig != data)
5520                 WREG32(RLC_PG_CNTL, data);
5521 }
5522
5523 #define CP_ME_TABLE_SIZE    96
5524 #define CP_ME_TABLE_OFFSET  2048
5525 #define CP_MEC_TABLE_OFFSET 4096
5526
5527 void cik_init_cp_pg_table(struct radeon_device *rdev)
5528 {
5529         const __be32 *fw_data;
5530         volatile u32 *dst_ptr;
5531         int me, i, max_me = 4;
5532         u32 bo_offset = 0;
5533         u32 table_offset;
5534
5535         if (rdev->family == CHIP_KAVERI)
5536                 max_me = 5;
5537
5538         if (rdev->rlc.cp_table_ptr == NULL)
5539                 return;
5540
5541         /* write the cp table buffer */
5542         dst_ptr = rdev->rlc.cp_table_ptr;
5543         for (me = 0; me < max_me; me++) {
5544                 if (me == 0) {
5545                         fw_data = (const __be32 *)rdev->ce_fw->data;
5546                         table_offset = CP_ME_TABLE_OFFSET;
5547                 } else if (me == 1) {
5548                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5549                         table_offset = CP_ME_TABLE_OFFSET;
5550                 } else if (me == 2) {
5551                         fw_data = (const __be32 *)rdev->me_fw->data;
5552                         table_offset = CP_ME_TABLE_OFFSET;
5553                 } else {
5554                         fw_data = (const __be32 *)rdev->mec_fw->data;
5555                         table_offset = CP_MEC_TABLE_OFFSET;
5556                 }
5557
5558                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5559                         dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5560                 }
5561                 bo_offset += CP_ME_TABLE_SIZE;
5562         }
5563 }
5564
5565 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5566                                 bool enable)
5567 {
5568         u32 data, orig;
5569
5570         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5571                 orig = data = RREG32(RLC_PG_CNTL);
5572                 data |= GFX_PG_ENABLE;
5573                 if (orig != data)
5574                         WREG32(RLC_PG_CNTL, data);
5575
5576                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5577                 data |= AUTO_PG_EN;
5578                 if (orig != data)
5579                         WREG32(RLC_AUTO_PG_CTRL, data);
5580         } else {
5581                 orig = data = RREG32(RLC_PG_CNTL);
5582                 data &= ~GFX_PG_ENABLE;
5583                 if (orig != data)
5584                         WREG32(RLC_PG_CNTL, data);
5585
5586                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5587                 data &= ~AUTO_PG_EN;
5588                 if (orig != data)
5589                         WREG32(RLC_AUTO_PG_CTRL, data);
5590
5591                 data = RREG32(DB_RENDER_CONTROL);
5592         }
5593 }
5594
5595 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5596 {
5597         u32 mask = 0, tmp, tmp1;
5598         int i;
5599
5600         cik_select_se_sh(rdev, se, sh);
5601         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5602         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5603         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5604
5605         tmp &= 0xffff0000;
5606
5607         tmp |= tmp1;
5608         tmp >>= 16;
5609
5610         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5611                 mask <<= 1;
5612                 mask |= 1;
5613         }
5614
5615         return (~tmp) & mask;
5616 }
5617
5618 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5619 {
5620         u32 i, j, k, active_cu_number = 0;
5621         u32 mask, counter, cu_bitmap;
5622         u32 tmp = 0;
5623
5624         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5625                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5626                         mask = 1;
5627                         cu_bitmap = 0;
5628                         counter = 0;
5629                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5630                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5631                                         if (counter < 2)
5632                                                 cu_bitmap |= mask;
5633                                         counter ++;
5634                                 }
5635                                 mask <<= 1;
5636                         }
5637
5638                         active_cu_number += counter;
5639                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5640                 }
5641         }
5642
5643         WREG32(RLC_PG_AO_CU_MASK, tmp);
5644
5645         tmp = RREG32(RLC_MAX_PG_CU);
5646         tmp &= ~MAX_PU_CU_MASK;
5647         tmp |= MAX_PU_CU(active_cu_number);
5648         WREG32(RLC_MAX_PG_CU, tmp);
5649 }
5650
5651 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5652                                        bool enable)
5653 {
5654         u32 data, orig;
5655
5656         orig = data = RREG32(RLC_PG_CNTL);
5657         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5658                 data |= STATIC_PER_CU_PG_ENABLE;
5659         else
5660                 data &= ~STATIC_PER_CU_PG_ENABLE;
5661         if (orig != data)
5662                 WREG32(RLC_PG_CNTL, data);
5663 }
5664
5665 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5666                                         bool enable)
5667 {
5668         u32 data, orig;
5669
5670         orig = data = RREG32(RLC_PG_CNTL);
5671         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5672                 data |= DYN_PER_CU_PG_ENABLE;
5673         else
5674                 data &= ~DYN_PER_CU_PG_ENABLE;
5675         if (orig != data)
5676                 WREG32(RLC_PG_CNTL, data);
5677 }
5678
5679 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5680 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5681
5682 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5683 {
5684         u32 data, orig;
5685         u32 i;
5686
5687         if (rdev->rlc.cs_data) {
5688                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5689                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5690                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5691                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5692         } else {
5693                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5694                 for (i = 0; i < 3; i++)
5695                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5696         }
5697         if (rdev->rlc.reg_list) {
5698                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5699                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5700                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5701         }
5702
5703         orig = data = RREG32(RLC_PG_CNTL);
5704         data |= GFX_PG_SRC;
5705         if (orig != data)
5706                 WREG32(RLC_PG_CNTL, data);
5707
5708         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5709         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5710
5711         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5712         data &= ~IDLE_POLL_COUNT_MASK;
5713         data |= IDLE_POLL_COUNT(0x60);
5714         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5715
5716         data = 0x10101010;
5717         WREG32(RLC_PG_DELAY, data);
5718
5719         data = RREG32(RLC_PG_DELAY_2);
5720         data &= ~0xff;
5721         data |= 0x3;
5722         WREG32(RLC_PG_DELAY_2, data);
5723
5724         data = RREG32(RLC_AUTO_PG_CTRL);
5725         data &= ~GRBM_REG_SGIT_MASK;
5726         data |= GRBM_REG_SGIT(0x700);
5727         WREG32(RLC_AUTO_PG_CTRL, data);
5728
5729 }
5730
5731 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5732 {
5733         cik_enable_gfx_cgpg(rdev, enable);
5734         cik_enable_gfx_static_mgpg(rdev, enable);
5735         cik_enable_gfx_dynamic_mgpg(rdev, enable);
5736 }
5737
5738 u32 cik_get_csb_size(struct radeon_device *rdev)
5739 {
5740         u32 count = 0;
5741         const struct cs_section_def *sect = NULL;
5742         const struct cs_extent_def *ext = NULL;
5743
5744         if (rdev->rlc.cs_data == NULL)
5745                 return 0;
5746
5747         /* begin clear state */
5748         count += 2;
5749         /* context control state */
5750         count += 3;
5751
5752         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5753                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5754                         if (sect->id == SECT_CONTEXT)
5755                                 count += 2 + ext->reg_count;
5756                         else
5757                                 return 0;
5758                 }
5759         }
5760         /* pa_sc_raster_config/pa_sc_raster_config1 */
5761         count += 4;
5762         /* end clear state */
5763         count += 2;
5764         /* clear state */
5765         count += 2;
5766
5767         return count;
5768 }
5769
5770 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5771 {
5772         u32 count = 0, i;
5773         const struct cs_section_def *sect = NULL;
5774         const struct cs_extent_def *ext = NULL;
5775
5776         if (rdev->rlc.cs_data == NULL)
5777                 return;
5778         if (buffer == NULL)
5779                 return;
5780
5781         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5782         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5783
5784         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5785         buffer[count++] = 0x80000000;
5786         buffer[count++] = 0x80000000;
5787
5788         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5789                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5790                         if (sect->id == SECT_CONTEXT) {
5791                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5792                                 buffer[count++] = ext->reg_index - 0xa000;
5793                                 for (i = 0; i < ext->reg_count; i++)
5794                                         buffer[count++] = ext->extent[i];
5795                         } else {
5796                                 return;
5797                         }
5798                 }
5799         }
5800
5801         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5802         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5803         switch (rdev->family) {
5804         case CHIP_BONAIRE:
5805                 buffer[count++] = 0x16000012;
5806                 buffer[count++] = 0x00000000;
5807                 break;
5808         case CHIP_KAVERI:
5809                 buffer[count++] = 0x00000000; /* XXX */
5810                 buffer[count++] = 0x00000000;
5811                 break;
5812         case CHIP_KABINI:
5813                 buffer[count++] = 0x00000000; /* XXX */
5814                 buffer[count++] = 0x00000000;
5815                 break;
5816         default:
5817                 buffer[count++] = 0x00000000;
5818                 buffer[count++] = 0x00000000;
5819                 break;
5820         }
5821
5822         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5823         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5824
5825         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5826         buffer[count++] = 0;
5827 }
5828
5829 static void cik_init_pg(struct radeon_device *rdev)
5830 {
5831         if (rdev->pg_flags) {
5832                 cik_enable_sck_slowdown_on_pu(rdev, true);
5833                 cik_enable_sck_slowdown_on_pd(rdev, true);
5834                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5835                         cik_init_gfx_cgpg(rdev);
5836                         cik_enable_cp_pg(rdev, true);
5837                         cik_enable_gds_pg(rdev, true);
5838                 }
5839                 cik_init_ao_cu_mask(rdev);
5840                 cik_update_gfx_pg(rdev, true);
5841         }
5842 }
5843
5844 static void cik_fini_pg(struct radeon_device *rdev)
5845 {
5846         if (rdev->pg_flags) {
5847                 cik_update_gfx_pg(rdev, false);
5848                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5849                         cik_enable_cp_pg(rdev, false);
5850                         cik_enable_gds_pg(rdev, false);
5851                 }
5852         }
5853 }
5854
5855 /*
5856  * Interrupts
5857  * Starting with r6xx, interrupts are handled via a ring buffer.
5858  * Ring buffers are areas of GPU accessible memory that the GPU
5859  * writes interrupt vectors into and the host reads vectors out of.
5860  * There is a rptr (read pointer) that determines where the
5861  * host is currently reading, and a wptr (write pointer)
5862  * which determines where the GPU has written.  When the
5863  * pointers are equal, the ring is idle.  When the GPU
5864  * writes vectors to the ring buffer, it increments the
5865  * wptr.  When there is an interrupt, the host then starts
5866  * fetching commands and processing them until the pointers are
5867  * equal again at which point it updates the rptr.
5868  */
5869
5870 /**
5871  * cik_enable_interrupts - Enable the interrupt ring buffer
5872  *
5873  * @rdev: radeon_device pointer
5874  *
5875  * Enable the interrupt ring buffer (CIK).
5876  */
5877 static void cik_enable_interrupts(struct radeon_device *rdev)
5878 {
5879         u32 ih_cntl = RREG32(IH_CNTL);
5880         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5881
5882         ih_cntl |= ENABLE_INTR;
5883         ih_rb_cntl |= IH_RB_ENABLE;
5884         WREG32(IH_CNTL, ih_cntl);
5885         WREG32(IH_RB_CNTL, ih_rb_cntl);
5886         rdev->ih.enabled = true;
5887 }
5888
5889 /**
5890  * cik_disable_interrupts - Disable the interrupt ring buffer
5891  *
5892  * @rdev: radeon_device pointer
5893  *
5894  * Disable the interrupt ring buffer (CIK).
5895  */
5896 static void cik_disable_interrupts(struct radeon_device *rdev)
5897 {
5898         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5899         u32 ih_cntl = RREG32(IH_CNTL);
5900
5901         ih_rb_cntl &= ~IH_RB_ENABLE;
5902         ih_cntl &= ~ENABLE_INTR;
5903         WREG32(IH_RB_CNTL, ih_rb_cntl);
5904         WREG32(IH_CNTL, ih_cntl);
5905         /* set rptr, wptr to 0 */
5906         WREG32(IH_RB_RPTR, 0);
5907         WREG32(IH_RB_WPTR, 0);
5908         rdev->ih.enabled = false;
5909         rdev->ih.rptr = 0;
5910 }
5911
5912 /**
5913  * cik_disable_interrupt_state - Disable all interrupt sources
5914  *
5915  * @rdev: radeon_device pointer
5916  *
5917  * Clear all interrupt enable bits used by the driver (CIK).
5918  */
5919 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5920 {
5921         u32 tmp;
5922
5923         /* gfx ring */
5924         tmp = RREG32(CP_INT_CNTL_RING0) &
5925                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5926         WREG32(CP_INT_CNTL_RING0, tmp);
5927         /* sdma */
5928         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5929         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5930         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5931         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5932         /* compute queues */
5933         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5934         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5935         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5936         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5937         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5938         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5939         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5940         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5941         /* grbm */
5942         WREG32(GRBM_INT_CNTL, 0);
5943         /* vline/vblank, etc. */
5944         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5945         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5946         if (rdev->num_crtc >= 4) {
5947                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5948                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5949         }
5950         if (rdev->num_crtc >= 6) {
5951                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5952                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5953         }
5954
5955         /* dac hotplug */
5956         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5957
5958         /* digital hotplug */
5959         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960         WREG32(DC_HPD1_INT_CONTROL, tmp);
5961         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962         WREG32(DC_HPD2_INT_CONTROL, tmp);
5963         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964         WREG32(DC_HPD3_INT_CONTROL, tmp);
5965         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966         WREG32(DC_HPD4_INT_CONTROL, tmp);
5967         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5968         WREG32(DC_HPD5_INT_CONTROL, tmp);
5969         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5970         WREG32(DC_HPD6_INT_CONTROL, tmp);
5971
5972 }
5973
5974 /**
5975  * cik_irq_init - init and enable the interrupt ring
5976  *
5977  * @rdev: radeon_device pointer
5978  *
5979  * Allocate a ring buffer for the interrupt controller,
5980  * enable the RLC, disable interrupts, enable the IH
5981  * ring buffer and enable it (CIK).
5982  * Called at device load and reume.
5983  * Returns 0 for success, errors for failure.
5984  */
5985 static int cik_irq_init(struct radeon_device *rdev)
5986 {
5987         int ret = 0;
5988         int rb_bufsz;
5989         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5990
5991         /* allocate ring */
5992         ret = r600_ih_ring_alloc(rdev);
5993         if (ret)
5994                 return ret;
5995
5996         /* disable irqs */
5997         cik_disable_interrupts(rdev);
5998
5999         /* init rlc */
6000         ret = cik_rlc_resume(rdev);
6001         if (ret) {
6002                 r600_ih_ring_fini(rdev);
6003                 return ret;
6004         }
6005
6006         /* setup interrupt control */
6007         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6008         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6009         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6010         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6011          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6012          */
6013         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6014         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6015         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6016         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6017
6018         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6019         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6020
6021         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6022                       IH_WPTR_OVERFLOW_CLEAR |
6023                       (rb_bufsz << 1));
6024
6025         if (rdev->wb.enabled)
6026                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6027
6028         /* set the writeback address whether it's enabled or not */
6029         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6030         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6031
6032         WREG32(IH_RB_CNTL, ih_rb_cntl);
6033
6034         /* set rptr, wptr to 0 */
6035         WREG32(IH_RB_RPTR, 0);
6036         WREG32(IH_RB_WPTR, 0);
6037
6038         /* Default settings for IH_CNTL (disabled at first) */
6039         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6040         /* RPTR_REARM only works if msi's are enabled */
6041         if (rdev->msi_enabled)
6042                 ih_cntl |= RPTR_REARM;
6043         WREG32(IH_CNTL, ih_cntl);
6044
6045         /* force the active interrupt state to all disabled */
6046         cik_disable_interrupt_state(rdev);
6047
6048         pci_set_master(rdev->pdev);
6049
6050         /* enable irqs */
6051         cik_enable_interrupts(rdev);
6052
6053         return ret;
6054 }
6055
6056 /**
6057  * cik_irq_set - enable/disable interrupt sources
6058  *
6059  * @rdev: radeon_device pointer
6060  *
6061  * Enable interrupt sources on the GPU (vblanks, hpd,
6062  * etc.) (CIK).
6063  * Returns 0 for success, errors for failure.
6064  */
6065 int cik_irq_set(struct radeon_device *rdev)
6066 {
6067         u32 cp_int_cntl;
6068         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6069         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6070         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6071         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6072         u32 grbm_int_cntl = 0;
6073         u32 dma_cntl, dma_cntl1;
6074         u32 thermal_int;
6075
6076         if (!rdev->irq.installed) {
6077                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6078                 return -EINVAL;
6079         }
6080         /* don't enable anything if the ih is disabled */
6081         if (!rdev->ih.enabled) {
6082                 cik_disable_interrupts(rdev);
6083                 /* force the active interrupt state to all disabled */
6084                 cik_disable_interrupt_state(rdev);
6085                 return 0;
6086         }
6087
6088         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6089                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6090         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6091
6092         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6093         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6094         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6095         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6096         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6097         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6098
6099         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6100         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6101
6102         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6103         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6104         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6105         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6106         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6107         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6108         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6109         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6110
6111         if (rdev->flags & RADEON_IS_IGP)
6112                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6113                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6114         else
6115                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6116                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6117
6118         /* enable CP interrupts on all rings */
6119         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6120                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6121                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6122         }
6123         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6124                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6125                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6126                 if (ring->me == 1) {
6127                         switch (ring->pipe) {
6128                         case 0:
6129                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6130                                 break;
6131                         case 1:
6132                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6133                                 break;
6134                         case 2:
6135                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6136                                 break;
6137                         case 3:
6138                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6139                                 break;
6140                         default:
6141                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6142                                 break;
6143                         }
6144                 } else if (ring->me == 2) {
6145                         switch (ring->pipe) {
6146                         case 0:
6147                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6148                                 break;
6149                         case 1:
6150                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6151                                 break;
6152                         case 2:
6153                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6154                                 break;
6155                         case 3:
6156                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6157                                 break;
6158                         default:
6159                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6160                                 break;
6161                         }
6162                 } else {
6163                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6164                 }
6165         }
6166         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6167                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6168                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6169                 if (ring->me == 1) {
6170                         switch (ring->pipe) {
6171                         case 0:
6172                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6173                                 break;
6174                         case 1:
6175                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6176                                 break;
6177                         case 2:
6178                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6179                                 break;
6180                         case 3:
6181                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6182                                 break;
6183                         default:
6184                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6185                                 break;
6186                         }
6187                 } else if (ring->me == 2) {
6188                         switch (ring->pipe) {
6189                         case 0:
6190                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6191                                 break;
6192                         case 1:
6193                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6194                                 break;
6195                         case 2:
6196                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6197                                 break;
6198                         case 3:
6199                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6200                                 break;
6201                         default:
6202                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6203                                 break;
6204                         }
6205                 } else {
6206                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6207                 }
6208         }
6209
6210         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6211                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6212                 dma_cntl |= TRAP_ENABLE;
6213         }
6214
6215         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6216                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6217                 dma_cntl1 |= TRAP_ENABLE;
6218         }
6219
6220         if (rdev->irq.crtc_vblank_int[0] ||
6221             atomic_read(&rdev->irq.pflip[0])) {
6222                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6223                 crtc1 |= VBLANK_INTERRUPT_MASK;
6224         }
6225         if (rdev->irq.crtc_vblank_int[1] ||
6226             atomic_read(&rdev->irq.pflip[1])) {
6227                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6228                 crtc2 |= VBLANK_INTERRUPT_MASK;
6229         }
6230         if (rdev->irq.crtc_vblank_int[2] ||
6231             atomic_read(&rdev->irq.pflip[2])) {
6232                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6233                 crtc3 |= VBLANK_INTERRUPT_MASK;
6234         }
6235         if (rdev->irq.crtc_vblank_int[3] ||
6236             atomic_read(&rdev->irq.pflip[3])) {
6237                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6238                 crtc4 |= VBLANK_INTERRUPT_MASK;
6239         }
6240         if (rdev->irq.crtc_vblank_int[4] ||
6241             atomic_read(&rdev->irq.pflip[4])) {
6242                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6243                 crtc5 |= VBLANK_INTERRUPT_MASK;
6244         }
6245         if (rdev->irq.crtc_vblank_int[5] ||
6246             atomic_read(&rdev->irq.pflip[5])) {
6247                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6248                 crtc6 |= VBLANK_INTERRUPT_MASK;
6249         }
6250         if (rdev->irq.hpd[0]) {
6251                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6252                 hpd1 |= DC_HPDx_INT_EN;
6253         }
6254         if (rdev->irq.hpd[1]) {
6255                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6256                 hpd2 |= DC_HPDx_INT_EN;
6257         }
6258         if (rdev->irq.hpd[2]) {
6259                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6260                 hpd3 |= DC_HPDx_INT_EN;
6261         }
6262         if (rdev->irq.hpd[3]) {
6263                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6264                 hpd4 |= DC_HPDx_INT_EN;
6265         }
6266         if (rdev->irq.hpd[4]) {
6267                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6268                 hpd5 |= DC_HPDx_INT_EN;
6269         }
6270         if (rdev->irq.hpd[5]) {
6271                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6272                 hpd6 |= DC_HPDx_INT_EN;
6273         }
6274
6275         if (rdev->irq.dpm_thermal) {
6276                 DRM_DEBUG("dpm thermal\n");
6277                 if (rdev->flags & RADEON_IS_IGP)
6278                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6279                 else
6280                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6281         }
6282
6283         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6284
6285         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6286         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6287
6288         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6289         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6290         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6291         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6292         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6293         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6294         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6295         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6296
6297         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6298
6299         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6300         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6301         if (rdev->num_crtc >= 4) {
6302                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6303                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6304         }
6305         if (rdev->num_crtc >= 6) {
6306                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6307                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6308         }
6309
6310         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6311         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6312         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6313         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6314         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6315         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6316
6317         if (rdev->flags & RADEON_IS_IGP)
6318                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6319         else
6320                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6321
6322         return 0;
6323 }
6324
6325 /**
6326  * cik_irq_ack - ack interrupt sources
6327  *
6328  * @rdev: radeon_device pointer
6329  *
6330  * Ack interrupt sources on the GPU (vblanks, hpd,
6331  * etc.) (CIK).  Certain interrupts sources are sw
6332  * generated and do not require an explicit ack.
6333  */
6334 static inline void cik_irq_ack(struct radeon_device *rdev)
6335 {
6336         u32 tmp;
6337
6338         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6339         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6340         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6341         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6342         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6343         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6344         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6345
6346         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6347                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6348         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6349                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6350         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6351                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6352         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6353                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6354
6355         if (rdev->num_crtc >= 4) {
6356                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6357                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6358                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6359                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6360                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6361                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6362                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6363                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6364         }
6365
6366         if (rdev->num_crtc >= 6) {
6367                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6368                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6369                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6370                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6371                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6372                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6373                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6374                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6375         }
6376
6377         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6378                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6379                 tmp |= DC_HPDx_INT_ACK;
6380                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6381         }
6382         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6383                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6384                 tmp |= DC_HPDx_INT_ACK;
6385                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6386         }
6387         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6388                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6389                 tmp |= DC_HPDx_INT_ACK;
6390                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6391         }
6392         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6393                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6394                 tmp |= DC_HPDx_INT_ACK;
6395                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6396         }
6397         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6398                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6399                 tmp |= DC_HPDx_INT_ACK;
6400                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6401         }
6402         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6403                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6404                 tmp |= DC_HPDx_INT_ACK;
6405                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6406         }
6407 }
6408
6409 /**
6410  * cik_irq_disable - disable interrupts
6411  *
6412  * @rdev: radeon_device pointer
6413  *
6414  * Disable interrupts on the hw (CIK).
6415  */
6416 static void cik_irq_disable(struct radeon_device *rdev)
6417 {
6418         cik_disable_interrupts(rdev);
6419         /* Wait and acknowledge irq */
6420         mdelay(1);
6421         cik_irq_ack(rdev);
6422         cik_disable_interrupt_state(rdev);
6423 }
6424
6425 /**
6426  * cik_irq_disable - disable interrupts for suspend
6427  *
6428  * @rdev: radeon_device pointer
6429  *
6430  * Disable interrupts and stop the RLC (CIK).
6431  * Used for suspend.
6432  */
6433 static void cik_irq_suspend(struct radeon_device *rdev)
6434 {
6435         cik_irq_disable(rdev);
6436         cik_rlc_stop(rdev);
6437 }
6438
6439 /**
6440  * cik_irq_fini - tear down interrupt support
6441  *
6442  * @rdev: radeon_device pointer
6443  *
6444  * Disable interrupts on the hw and free the IH ring
6445  * buffer (CIK).
6446  * Used for driver unload.
6447  */
6448 static void cik_irq_fini(struct radeon_device *rdev)
6449 {
6450         cik_irq_suspend(rdev);
6451         r600_ih_ring_fini(rdev);
6452 }
6453
6454 /**
6455  * cik_get_ih_wptr - get the IH ring buffer wptr
6456  *
6457  * @rdev: radeon_device pointer
6458  *
6459  * Get the IH ring buffer wptr from either the register
6460  * or the writeback memory buffer (CIK).  Also check for
6461  * ring buffer overflow and deal with it.
6462  * Used by cik_irq_process().
6463  * Returns the value of the wptr.
6464  */
6465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6466 {
6467         u32 wptr, tmp;
6468
6469         if (rdev->wb.enabled)
6470                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6471         else
6472                 wptr = RREG32(IH_RB_WPTR);
6473
6474         if (wptr & RB_OVERFLOW) {
6475                 /* When a ring buffer overflow happen start parsing interrupt
6476                  * from the last not overwritten vector (wptr + 16). Hopefully
6477                  * this should allow us to catchup.
6478                  */
6479                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6480                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6481                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6482                 tmp = RREG32(IH_RB_CNTL);
6483                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6484                 WREG32(IH_RB_CNTL, tmp);
6485         }
6486         return (wptr & rdev->ih.ptr_mask);
6487 }
6488
6489 /*        CIK IV Ring
6490  * Each IV ring entry is 128 bits:
6491  * [7:0]    - interrupt source id
6492  * [31:8]   - reserved
6493  * [59:32]  - interrupt source data
6494  * [63:60]  - reserved
6495  * [71:64]  - RINGID
6496  *            CP:
6497  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6498  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6499  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6500  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6501  *            PIPE_ID - ME0 0=3D
6502  *                    - ME1&2 compute dispatcher (4 pipes each)
6503  *            SDMA:
6504  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6505  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6506  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6507  * [79:72]  - VMID
6508  * [95:80]  - PASID
6509  * [127:96] - reserved
6510  */
6511 /**
6512  * cik_irq_process - interrupt handler
6513  *
6514  * @rdev: radeon_device pointer
6515  *
6516  * Interrupt hander (CIK).  Walk the IH ring,
6517  * ack interrupts and schedule work to handle
6518  * interrupt events.
6519  * Returns irq process return code.
6520  */
6521 int cik_irq_process(struct radeon_device *rdev)
6522 {
6523         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6524         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6525         u32 wptr;
6526         u32 rptr;
6527         u32 src_id, src_data, ring_id;
6528         u8 me_id, pipe_id, queue_id;
6529         u32 ring_index;
6530         bool queue_hotplug = false;
6531         bool queue_reset = false;
6532         u32 addr, status, mc_client;
6533         bool queue_thermal = false;
6534
6535         if (!rdev->ih.enabled || rdev->shutdown)
6536                 return IRQ_NONE;
6537
6538         wptr = cik_get_ih_wptr(rdev);
6539
6540 restart_ih:
6541         /* is somebody else already processing irqs? */
6542         if (atomic_xchg(&rdev->ih.lock, 1))
6543                 return IRQ_NONE;
6544
6545         rptr = rdev->ih.rptr;
6546         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6547
6548         /* Order reading of wptr vs. reading of IH ring data */
6549         rmb();
6550
6551         /* display interrupts */
6552         cik_irq_ack(rdev);
6553
6554         while (rptr != wptr) {
6555                 /* wptr/rptr are in bytes! */
6556                 ring_index = rptr / 4;
6557                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6558                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6559                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6560
6561                 switch (src_id) {
6562                 case 1: /* D1 vblank/vline */
6563                         switch (src_data) {
6564                         case 0: /* D1 vblank */
6565                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6566                                         if (rdev->irq.crtc_vblank_int[0]) {
6567                                                 drm_handle_vblank(rdev->ddev, 0);
6568                                                 rdev->pm.vblank_sync = true;
6569                                                 wake_up(&rdev->irq.vblank_queue);
6570                                         }
6571                                         if (atomic_read(&rdev->irq.pflip[0]))
6572                                                 radeon_crtc_handle_flip(rdev, 0);
6573                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6574                                         DRM_DEBUG("IH: D1 vblank\n");
6575                                 }
6576                                 break;
6577                         case 1: /* D1 vline */
6578                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6579                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6580                                         DRM_DEBUG("IH: D1 vline\n");
6581                                 }
6582                                 break;
6583                         default:
6584                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6585                                 break;
6586                         }
6587                         break;
6588                 case 2: /* D2 vblank/vline */
6589                         switch (src_data) {
6590                         case 0: /* D2 vblank */
6591                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6592                                         if (rdev->irq.crtc_vblank_int[1]) {
6593                                                 drm_handle_vblank(rdev->ddev, 1);
6594                                                 rdev->pm.vblank_sync = true;
6595                                                 wake_up(&rdev->irq.vblank_queue);
6596                                         }
6597                                         if (atomic_read(&rdev->irq.pflip[1]))
6598                                                 radeon_crtc_handle_flip(rdev, 1);
6599                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6600                                         DRM_DEBUG("IH: D2 vblank\n");
6601                                 }
6602                                 break;
6603                         case 1: /* D2 vline */
6604                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6605                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6606                                         DRM_DEBUG("IH: D2 vline\n");
6607                                 }
6608                                 break;
6609                         default:
6610                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6611                                 break;
6612                         }
6613                         break;
6614                 case 3: /* D3 vblank/vline */
6615                         switch (src_data) {
6616                         case 0: /* D3 vblank */
6617                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6618                                         if (rdev->irq.crtc_vblank_int[2]) {
6619                                                 drm_handle_vblank(rdev->ddev, 2);
6620                                                 rdev->pm.vblank_sync = true;
6621                                                 wake_up(&rdev->irq.vblank_queue);
6622                                         }
6623                                         if (atomic_read(&rdev->irq.pflip[2]))
6624                                                 radeon_crtc_handle_flip(rdev, 2);
6625                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6626                                         DRM_DEBUG("IH: D3 vblank\n");
6627                                 }
6628                                 break;
6629                         case 1: /* D3 vline */
6630                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6631                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6632                                         DRM_DEBUG("IH: D3 vline\n");
6633                                 }
6634                                 break;
6635                         default:
6636                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6637                                 break;
6638                         }
6639                         break;
6640                 case 4: /* D4 vblank/vline */
6641                         switch (src_data) {
6642                         case 0: /* D4 vblank */
6643                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6644                                         if (rdev->irq.crtc_vblank_int[3]) {
6645                                                 drm_handle_vblank(rdev->ddev, 3);
6646                                                 rdev->pm.vblank_sync = true;
6647                                                 wake_up(&rdev->irq.vblank_queue);
6648                                         }
6649                                         if (atomic_read(&rdev->irq.pflip[3]))
6650                                                 radeon_crtc_handle_flip(rdev, 3);
6651                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6652                                         DRM_DEBUG("IH: D4 vblank\n");
6653                                 }
6654                                 break;
6655                         case 1: /* D4 vline */
6656                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6657                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6658                                         DRM_DEBUG("IH: D4 vline\n");
6659                                 }
6660                                 break;
6661                         default:
6662                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6663                                 break;
6664                         }
6665                         break;
6666                 case 5: /* D5 vblank/vline */
6667                         switch (src_data) {
6668                         case 0: /* D5 vblank */
6669                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6670                                         if (rdev->irq.crtc_vblank_int[4]) {
6671                                                 drm_handle_vblank(rdev->ddev, 4);
6672                                                 rdev->pm.vblank_sync = true;
6673                                                 wake_up(&rdev->irq.vblank_queue);
6674                                         }
6675                                         if (atomic_read(&rdev->irq.pflip[4]))
6676                                                 radeon_crtc_handle_flip(rdev, 4);
6677                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6678                                         DRM_DEBUG("IH: D5 vblank\n");
6679                                 }
6680                                 break;
6681                         case 1: /* D5 vline */
6682                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6683                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6684                                         DRM_DEBUG("IH: D5 vline\n");
6685                                 }
6686                                 break;
6687                         default:
6688                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6689                                 break;
6690                         }
6691                         break;
6692                 case 6: /* D6 vblank/vline */
6693                         switch (src_data) {
6694                         case 0: /* D6 vblank */
6695                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6696                                         if (rdev->irq.crtc_vblank_int[5]) {
6697                                                 drm_handle_vblank(rdev->ddev, 5);
6698                                                 rdev->pm.vblank_sync = true;
6699                                                 wake_up(&rdev->irq.vblank_queue);
6700                                         }
6701                                         if (atomic_read(&rdev->irq.pflip[5]))
6702                                                 radeon_crtc_handle_flip(rdev, 5);
6703                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6704                                         DRM_DEBUG("IH: D6 vblank\n");
6705                                 }
6706                                 break;
6707                         case 1: /* D6 vline */
6708                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6709                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6710                                         DRM_DEBUG("IH: D6 vline\n");
6711                                 }
6712                                 break;
6713                         default:
6714                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6715                                 break;
6716                         }
6717                         break;
6718                 case 42: /* HPD hotplug */
6719                         switch (src_data) {
6720                         case 0:
6721                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6722                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6723                                         queue_hotplug = true;
6724                                         DRM_DEBUG("IH: HPD1\n");
6725                                 }
6726                                 break;
6727                         case 1:
6728                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6729                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6730                                         queue_hotplug = true;
6731                                         DRM_DEBUG("IH: HPD2\n");
6732                                 }
6733                                 break;
6734                         case 2:
6735                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6736                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6737                                         queue_hotplug = true;
6738                                         DRM_DEBUG("IH: HPD3\n");
6739                                 }
6740                                 break;
6741                         case 3:
6742                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6743                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6744                                         queue_hotplug = true;
6745                                         DRM_DEBUG("IH: HPD4\n");
6746                                 }
6747                                 break;
6748                         case 4:
6749                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6750                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6751                                         queue_hotplug = true;
6752                                         DRM_DEBUG("IH: HPD5\n");
6753                                 }
6754                                 break;
6755                         case 5:
6756                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6757                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6758                                         queue_hotplug = true;
6759                                         DRM_DEBUG("IH: HPD6\n");
6760                                 }
6761                                 break;
6762                         default:
6763                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6764                                 break;
6765                         }
6766                         break;
6767                 case 124: /* UVD */
6768                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6769                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6770                         break;
6771                 case 146:
6772                 case 147:
6773                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6774                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6775                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6776                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6777                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6778                                 addr);
6779                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6780                                 status);
6781                         cik_vm_decode_fault(rdev, status, addr, mc_client);
6782                         /* reset addr and status */
6783                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6784                         break;
6785                 case 176: /* GFX RB CP_INT */
6786                 case 177: /* GFX IB CP_INT */
6787                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6788                         break;
6789                 case 181: /* CP EOP event */
6790                         DRM_DEBUG("IH: CP EOP\n");
6791                         /* XXX check the bitfield order! */
6792                         me_id = (ring_id & 0x60) >> 5;
6793                         pipe_id = (ring_id & 0x18) >> 3;
6794                         queue_id = (ring_id & 0x7) >> 0;
6795                         switch (me_id) {
6796                         case 0:
6797                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6798                                 break;
6799                         case 1:
6800                         case 2:
6801                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6802                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6803                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6804                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6805                                 break;
6806                         }
6807                         break;
6808                 case 184: /* CP Privileged reg access */
6809                         DRM_ERROR("Illegal register access in command stream\n");
6810                         /* XXX check the bitfield order! */
6811                         me_id = (ring_id & 0x60) >> 5;
6812                         pipe_id = (ring_id & 0x18) >> 3;
6813                         queue_id = (ring_id & 0x7) >> 0;
6814                         switch (me_id) {
6815                         case 0:
6816                                 /* This results in a full GPU reset, but all we need to do is soft
6817                                  * reset the CP for gfx
6818                                  */
6819                                 queue_reset = true;
6820                                 break;
6821                         case 1:
6822                                 /* XXX compute */
6823                                 queue_reset = true;
6824                                 break;
6825                         case 2:
6826                                 /* XXX compute */
6827                                 queue_reset = true;
6828                                 break;
6829                         }
6830                         break;
6831                 case 185: /* CP Privileged inst */
6832                         DRM_ERROR("Illegal instruction in command stream\n");
6833                         /* XXX check the bitfield order! */
6834                         me_id = (ring_id & 0x60) >> 5;
6835                         pipe_id = (ring_id & 0x18) >> 3;
6836                         queue_id = (ring_id & 0x7) >> 0;
6837                         switch (me_id) {
6838                         case 0:
6839                                 /* This results in a full GPU reset, but all we need to do is soft
6840                                  * reset the CP for gfx
6841                                  */
6842                                 queue_reset = true;
6843                                 break;
6844                         case 1:
6845                                 /* XXX compute */
6846                                 queue_reset = true;
6847                                 break;
6848                         case 2:
6849                                 /* XXX compute */
6850                                 queue_reset = true;
6851                                 break;
6852                         }
6853                         break;
6854                 case 224: /* SDMA trap event */
6855                         /* XXX check the bitfield order! */
6856                         me_id = (ring_id & 0x3) >> 0;
6857                         queue_id = (ring_id & 0xc) >> 2;
6858                         DRM_DEBUG("IH: SDMA trap\n");
6859                         switch (me_id) {
6860                         case 0:
6861                                 switch (queue_id) {
6862                                 case 0:
6863                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6864                                         break;
6865                                 case 1:
6866                                         /* XXX compute */
6867                                         break;
6868                                 case 2:
6869                                         /* XXX compute */
6870                                         break;
6871                                 }
6872                                 break;
6873                         case 1:
6874                                 switch (queue_id) {
6875                                 case 0:
6876                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6877                                         break;
6878                                 case 1:
6879                                         /* XXX compute */
6880                                         break;
6881                                 case 2:
6882                                         /* XXX compute */
6883                                         break;
6884                                 }
6885                                 break;
6886                         }
6887                         break;
6888                 case 230: /* thermal low to high */
6889                         DRM_DEBUG("IH: thermal low to high\n");
6890                         rdev->pm.dpm.thermal.high_to_low = false;
6891                         queue_thermal = true;
6892                         break;
6893                 case 231: /* thermal high to low */
6894                         DRM_DEBUG("IH: thermal high to low\n");
6895                         rdev->pm.dpm.thermal.high_to_low = true;
6896                         queue_thermal = true;
6897                         break;
6898                 case 233: /* GUI IDLE */
6899                         DRM_DEBUG("IH: GUI idle\n");
6900                         break;
6901                 case 241: /* SDMA Privileged inst */
6902                 case 247: /* SDMA Privileged inst */
6903                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
6904                         /* XXX check the bitfield order! */
6905                         me_id = (ring_id & 0x3) >> 0;
6906                         queue_id = (ring_id & 0xc) >> 2;
6907                         switch (me_id) {
6908                         case 0:
6909                                 switch (queue_id) {
6910                                 case 0:
6911                                         queue_reset = true;
6912                                         break;
6913                                 case 1:
6914                                         /* XXX compute */
6915                                         queue_reset = true;
6916                                         break;
6917                                 case 2:
6918                                         /* XXX compute */
6919                                         queue_reset = true;
6920                                         break;
6921                                 }
6922                                 break;
6923                         case 1:
6924                                 switch (queue_id) {
6925                                 case 0:
6926                                         queue_reset = true;
6927                                         break;
6928                                 case 1:
6929                                         /* XXX compute */
6930                                         queue_reset = true;
6931                                         break;
6932                                 case 2:
6933                                         /* XXX compute */
6934                                         queue_reset = true;
6935                                         break;
6936                                 }
6937                                 break;
6938                         }
6939                         break;
6940                 default:
6941                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6942                         break;
6943                 }
6944
6945                 /* wptr/rptr are in bytes! */
6946                 rptr += 16;
6947                 rptr &= rdev->ih.ptr_mask;
6948         }
6949         if (queue_hotplug)
6950                 schedule_work(&rdev->hotplug_work);
6951         if (queue_reset)
6952                 schedule_work(&rdev->reset_work);
6953         if (queue_thermal)
6954                 schedule_work(&rdev->pm.dpm.thermal.work);
6955         rdev->ih.rptr = rptr;
6956         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6957         atomic_set(&rdev->ih.lock, 0);
6958
6959         /* make sure wptr hasn't changed while processing */
6960         wptr = cik_get_ih_wptr(rdev);
6961         if (wptr != rptr)
6962                 goto restart_ih;
6963
6964         return IRQ_HANDLED;
6965 }
6966
6967 /*
6968  * startup/shutdown callbacks
6969  */
6970 /**
6971  * cik_startup - program the asic to a functional state
6972  *
6973  * @rdev: radeon_device pointer
6974  *
6975  * Programs the asic to a functional state (CIK).
6976  * Called by cik_init() and cik_resume().
6977  * Returns 0 for success, error for failure.
6978  */
6979 static int cik_startup(struct radeon_device *rdev)
6980 {
6981         struct radeon_ring *ring;
6982         int r;
6983
6984         /* enable pcie gen2/3 link */
6985         cik_pcie_gen3_enable(rdev);
6986         /* enable aspm */
6987         cik_program_aspm(rdev);
6988
6989         /* scratch needs to be initialized before MC */
6990         r = r600_vram_scratch_init(rdev);
6991         if (r)
6992                 return r;
6993
6994         cik_mc_program(rdev);
6995
6996         if (rdev->flags & RADEON_IS_IGP) {
6997                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6998                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6999                         r = cik_init_microcode(rdev);
7000                         if (r) {
7001                                 DRM_ERROR("Failed to load firmware!\n");
7002                                 return r;
7003                         }
7004                 }
7005         } else {
7006                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7007                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7008                     !rdev->mc_fw) {
7009                         r = cik_init_microcode(rdev);
7010                         if (r) {
7011                                 DRM_ERROR("Failed to load firmware!\n");
7012                                 return r;
7013                         }
7014                 }
7015
7016                 r = ci_mc_load_microcode(rdev);
7017                 if (r) {
7018                         DRM_ERROR("Failed to load MC firmware!\n");
7019                         return r;
7020                 }
7021         }
7022
7023         r = cik_pcie_gart_enable(rdev);
7024         if (r)
7025                 return r;
7026         cik_gpu_init(rdev);
7027
7028         /* allocate rlc buffers */
7029         if (rdev->flags & RADEON_IS_IGP) {
7030                 if (rdev->family == CHIP_KAVERI) {
7031                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7032                         rdev->rlc.reg_list_size =
7033                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7034                 } else {
7035                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7036                         rdev->rlc.reg_list_size =
7037                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7038                 }
7039         }
7040         rdev->rlc.cs_data = ci_cs_data;
7041         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7042         r = sumo_rlc_init(rdev);
7043         if (r) {
7044                 DRM_ERROR("Failed to init rlc BOs!\n");
7045                 return r;
7046         }
7047
7048         /* allocate wb buffer */
7049         r = radeon_wb_init(rdev);
7050         if (r)
7051                 return r;
7052
7053         /* allocate mec buffers */
7054         r = cik_mec_init(rdev);
7055         if (r) {
7056                 DRM_ERROR("Failed to init MEC BOs!\n");
7057                 return r;
7058         }
7059
7060         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7061         if (r) {
7062                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7063                 return r;
7064         }
7065
7066         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7067         if (r) {
7068                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7069                 return r;
7070         }
7071
7072         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7073         if (r) {
7074                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7075                 return r;
7076         }
7077
7078         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7079         if (r) {
7080                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7081                 return r;
7082         }
7083
7084         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7085         if (r) {
7086                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7087                 return r;
7088         }
7089
7090         r = radeon_uvd_resume(rdev);
7091         if (!r) {
7092                 r = uvd_v4_2_resume(rdev);
7093                 if (!r) {
7094                         r = radeon_fence_driver_start_ring(rdev,
7095                                                            R600_RING_TYPE_UVD_INDEX);
7096                         if (r)
7097                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7098                 }
7099         }
7100         if (r)
7101                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7102
7103         /* Enable IRQ */
7104         if (!rdev->irq.installed) {
7105                 r = radeon_irq_kms_init(rdev);
7106                 if (r)
7107                         return r;
7108         }
7109
7110         r = cik_irq_init(rdev);
7111         if (r) {
7112                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7113                 radeon_irq_kms_fini(rdev);
7114                 return r;
7115         }
7116         cik_irq_set(rdev);
7117
7118         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7119         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7120                              CP_RB0_RPTR, CP_RB0_WPTR,
7121                              RADEON_CP_PACKET2);
7122         if (r)
7123                 return r;
7124
7125         /* set up the compute queues */
7126         /* type-2 packets are deprecated on MEC, use type-3 instead */
7127         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7128         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7129                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7130                              PACKET3(PACKET3_NOP, 0x3FFF));
7131         if (r)
7132                 return r;
7133         ring->me = 1; /* first MEC */
7134         ring->pipe = 0; /* first pipe */
7135         ring->queue = 0; /* first queue */
7136         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7137
7138         /* type-2 packets are deprecated on MEC, use type-3 instead */
7139         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7140         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7141                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7142                              PACKET3(PACKET3_NOP, 0x3FFF));
7143         if (r)
7144                 return r;
7145         /* dGPU only have 1 MEC */
7146         ring->me = 1; /* first MEC */
7147         ring->pipe = 0; /* first pipe */
7148         ring->queue = 1; /* second queue */
7149         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7150
7151         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7152         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7153                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7154                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7155                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7156         if (r)
7157                 return r;
7158
7159         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7160         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7161                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7162                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7163                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7164         if (r)
7165                 return r;
7166
7167         r = cik_cp_resume(rdev);
7168         if (r)
7169                 return r;
7170
7171         r = cik_sdma_resume(rdev);
7172         if (r)
7173                 return r;
7174
7175         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7176         if (ring->ring_size) {
7177                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7178                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7179                                      RADEON_CP_PACKET2);
7180                 if (!r)
7181                         r = uvd_v1_0_init(rdev);
7182                 if (r)
7183                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7184         }
7185
7186         r = radeon_ib_pool_init(rdev);
7187         if (r) {
7188                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7189                 return r;
7190         }
7191
7192         r = radeon_vm_manager_init(rdev);
7193         if (r) {
7194                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7195                 return r;
7196         }
7197
7198         r = dce6_audio_init(rdev);
7199         if (r)
7200                 return r;
7201
7202         return 0;
7203 }
7204
7205 /**
7206  * cik_resume - resume the asic to a functional state
7207  *
7208  * @rdev: radeon_device pointer
7209  *
7210  * Programs the asic to a functional state (CIK).
7211  * Called at resume.
7212  * Returns 0 for success, error for failure.
7213  */
7214 int cik_resume(struct radeon_device *rdev)
7215 {
7216         int r;
7217
7218         /* post card */
7219         atom_asic_init(rdev->mode_info.atom_context);
7220
7221         /* init golden registers */
7222         cik_init_golden_registers(rdev);
7223
7224         rdev->accel_working = true;
7225         r = cik_startup(rdev);
7226         if (r) {
7227                 DRM_ERROR("cik startup failed on resume\n");
7228                 rdev->accel_working = false;
7229                 return r;
7230         }
7231
7232         return r;
7233
7234 }
7235
7236 /**
7237  * cik_suspend - suspend the asic
7238  *
7239  * @rdev: radeon_device pointer
7240  *
7241  * Bring the chip into a state suitable for suspend (CIK).
7242  * Called at suspend.
7243  * Returns 0 for success.
7244  */
7245 int cik_suspend(struct radeon_device *rdev)
7246 {
7247         dce6_audio_fini(rdev);
7248         radeon_vm_manager_fini(rdev);
7249         cik_cp_enable(rdev, false);
7250         cik_sdma_enable(rdev, false);
7251         uvd_v1_0_fini(rdev);
7252         radeon_uvd_suspend(rdev);
7253         cik_fini_pg(rdev);
7254         cik_fini_cg(rdev);
7255         cik_irq_suspend(rdev);
7256         radeon_wb_disable(rdev);
7257         cik_pcie_gart_disable(rdev);
7258         return 0;
7259 }
7260
7261 /* Plan is to move initialization in that function and use
7262  * helper function so that radeon_device_init pretty much
7263  * do nothing more than calling asic specific function. This
7264  * should also allow to remove a bunch of callback function
7265  * like vram_info.
7266  */
7267 /**
7268  * cik_init - asic specific driver and hw init
7269  *
7270  * @rdev: radeon_device pointer
7271  *
7272  * Setup asic specific driver variables and program the hw
7273  * to a functional state (CIK).
7274  * Called at driver startup.
7275  * Returns 0 for success, errors for failure.
7276  */
7277 int cik_init(struct radeon_device *rdev)
7278 {
7279         struct radeon_ring *ring;
7280         int r;
7281
7282         /* Read BIOS */
7283         if (!radeon_get_bios(rdev)) {
7284                 if (ASIC_IS_AVIVO(rdev))
7285                         return -EINVAL;
7286         }
7287         /* Must be an ATOMBIOS */
7288         if (!rdev->is_atom_bios) {
7289                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7290                 return -EINVAL;
7291         }
7292         r = radeon_atombios_init(rdev);
7293         if (r)
7294                 return r;
7295
7296         /* Post card if necessary */
7297         if (!radeon_card_posted(rdev)) {
7298                 if (!rdev->bios) {
7299                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7300                         return -EINVAL;
7301                 }
7302                 DRM_INFO("GPU not posted. posting now...\n");
7303                 atom_asic_init(rdev->mode_info.atom_context);
7304         }
7305         /* init golden registers */
7306         cik_init_golden_registers(rdev);
7307         /* Initialize scratch registers */
7308         cik_scratch_init(rdev);
7309         /* Initialize surface registers */
7310         radeon_surface_init(rdev);
7311         /* Initialize clocks */
7312         radeon_get_clock_info(rdev->ddev);
7313
7314         /* Fence driver */
7315         r = radeon_fence_driver_init(rdev);
7316         if (r)
7317                 return r;
7318
7319         /* initialize memory controller */
7320         r = cik_mc_init(rdev);
7321         if (r)
7322                 return r;
7323         /* Memory manager */
7324         r = radeon_bo_init(rdev);
7325         if (r)
7326                 return r;
7327
7328         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7329         ring->ring_obj = NULL;
7330         r600_ring_init(rdev, ring, 1024 * 1024);
7331
7332         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7333         ring->ring_obj = NULL;
7334         r600_ring_init(rdev, ring, 1024 * 1024);
7335         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7336         if (r)
7337                 return r;
7338
7339         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7340         ring->ring_obj = NULL;
7341         r600_ring_init(rdev, ring, 1024 * 1024);
7342         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7343         if (r)
7344                 return r;
7345
7346         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7347         ring->ring_obj = NULL;
7348         r600_ring_init(rdev, ring, 256 * 1024);
7349
7350         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7351         ring->ring_obj = NULL;
7352         r600_ring_init(rdev, ring, 256 * 1024);
7353
7354         r = radeon_uvd_init(rdev);
7355         if (!r) {
7356                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7357                 ring->ring_obj = NULL;
7358                 r600_ring_init(rdev, ring, 4096);
7359         }
7360
7361         rdev->ih.ring_obj = NULL;
7362         r600_ih_ring_init(rdev, 64 * 1024);
7363
7364         r = r600_pcie_gart_init(rdev);
7365         if (r)
7366                 return r;
7367
7368         rdev->accel_working = true;
7369         r = cik_startup(rdev);
7370         if (r) {
7371                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7372                 cik_cp_fini(rdev);
7373                 cik_sdma_fini(rdev);
7374                 cik_irq_fini(rdev);
7375                 sumo_rlc_fini(rdev);
7376                 cik_mec_fini(rdev);
7377                 radeon_wb_fini(rdev);
7378                 radeon_ib_pool_fini(rdev);
7379                 radeon_vm_manager_fini(rdev);
7380                 radeon_irq_kms_fini(rdev);
7381                 cik_pcie_gart_fini(rdev);
7382                 rdev->accel_working = false;
7383         }
7384
7385         /* Don't start up if the MC ucode is missing.
7386          * The default clocks and voltages before the MC ucode
7387          * is loaded are not suffient for advanced operations.
7388          */
7389         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7390                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7391                 return -EINVAL;
7392         }
7393
7394         return 0;
7395 }
7396
7397 /**
7398  * cik_fini - asic specific driver and hw fini
7399  *
7400  * @rdev: radeon_device pointer
7401  *
7402  * Tear down the asic specific driver variables and program the hw
7403  * to an idle state (CIK).
7404  * Called at driver unload.
7405  */
7406 void cik_fini(struct radeon_device *rdev)
7407 {
7408         cik_cp_fini(rdev);
7409         cik_sdma_fini(rdev);
7410         cik_fini_pg(rdev);
7411         cik_fini_cg(rdev);
7412         cik_irq_fini(rdev);
7413         sumo_rlc_fini(rdev);
7414         cik_mec_fini(rdev);
7415         radeon_wb_fini(rdev);
7416         radeon_vm_manager_fini(rdev);
7417         radeon_ib_pool_fini(rdev);
7418         radeon_irq_kms_fini(rdev);
7419         uvd_v1_0_fini(rdev);
7420         radeon_uvd_fini(rdev);
7421         cik_pcie_gart_fini(rdev);
7422         r600_vram_scratch_fini(rdev);
7423         radeon_gem_fini(rdev);
7424         radeon_fence_driver_fini(rdev);
7425         radeon_bo_fini(rdev);
7426         radeon_atombios_fini(rdev);
7427         kfree(rdev->bios);
7428         rdev->bios = NULL;
7429 }
7430
7431 /* display watermark setup */
7432 /**
7433  * dce8_line_buffer_adjust - Set up the line buffer
7434  *
7435  * @rdev: radeon_device pointer
7436  * @radeon_crtc: the selected display controller
7437  * @mode: the current display mode on the selected display
7438  * controller
7439  *
7440  * Setup up the line buffer allocation for
7441  * the selected display controller (CIK).
7442  * Returns the line buffer size in pixels.
7443  */
7444 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7445                                    struct radeon_crtc *radeon_crtc,
7446                                    struct drm_display_mode *mode)
7447 {
7448         u32 tmp, buffer_alloc, i;
7449         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7450         /*
7451          * Line Buffer Setup
7452          * There are 6 line buffers, one for each display controllers.
7453          * There are 3 partitions per LB. Select the number of partitions
7454          * to enable based on the display width.  For display widths larger
7455          * than 4096, you need use to use 2 display controllers and combine
7456          * them using the stereo blender.
7457          */
7458         if (radeon_crtc->base.enabled && mode) {
7459                 if (mode->crtc_hdisplay < 1920) {
7460                         tmp = 1;
7461                         buffer_alloc = 2;
7462                 } else if (mode->crtc_hdisplay < 2560) {
7463                         tmp = 2;
7464                         buffer_alloc = 2;
7465                 } else if (mode->crtc_hdisplay < 4096) {
7466                         tmp = 0;
7467                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7468                 } else {
7469                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7470                         tmp = 0;
7471                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7472                 }
7473         } else {
7474                 tmp = 1;
7475                 buffer_alloc = 0;
7476         }
7477
7478         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7479                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7480
7481         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7482                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7483         for (i = 0; i < rdev->usec_timeout; i++) {
7484                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7485                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
7486                         break;
7487                 udelay(1);
7488         }
7489
7490         if (radeon_crtc->base.enabled && mode) {
7491                 switch (tmp) {
7492                 case 0:
7493                 default:
7494                         return 4096 * 2;
7495                 case 1:
7496                         return 1920 * 2;
7497                 case 2:
7498                         return 2560 * 2;
7499                 }
7500         }
7501
7502         /* controller not enabled, so no lb used */
7503         return 0;
7504 }
7505
7506 /**
7507  * cik_get_number_of_dram_channels - get the number of dram channels
7508  *
7509  * @rdev: radeon_device pointer
7510  *
7511  * Look up the number of video ram channels (CIK).
7512  * Used for display watermark bandwidth calculations
7513  * Returns the number of dram channels
7514  */
7515 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7516 {
7517         u32 tmp = RREG32(MC_SHARED_CHMAP);
7518
7519         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7520         case 0:
7521         default:
7522                 return 1;
7523         case 1:
7524                 return 2;
7525         case 2:
7526                 return 4;
7527         case 3:
7528                 return 8;
7529         case 4:
7530                 return 3;
7531         case 5:
7532                 return 6;
7533         case 6:
7534                 return 10;
7535         case 7:
7536                 return 12;
7537         case 8:
7538                 return 16;
7539         }
7540 }
7541
7542 struct dce8_wm_params {
7543         u32 dram_channels; /* number of dram channels */
7544         u32 yclk;          /* bandwidth per dram data pin in kHz */
7545         u32 sclk;          /* engine clock in kHz */
7546         u32 disp_clk;      /* display clock in kHz */
7547         u32 src_width;     /* viewport width */
7548         u32 active_time;   /* active display time in ns */
7549         u32 blank_time;    /* blank time in ns */
7550         bool interlaced;    /* mode is interlaced */
7551         fixed20_12 vsc;    /* vertical scale ratio */
7552         u32 num_heads;     /* number of active crtcs */
7553         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7554         u32 lb_size;       /* line buffer allocated to pipe */
7555         u32 vtaps;         /* vertical scaler taps */
7556 };
7557
7558 /**
7559  * dce8_dram_bandwidth - get the dram bandwidth
7560  *
7561  * @wm: watermark calculation data
7562  *
7563  * Calculate the raw dram bandwidth (CIK).
7564  * Used for display watermark bandwidth calculations
7565  * Returns the dram bandwidth in MBytes/s
7566  */
7567 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7568 {
7569         /* Calculate raw DRAM Bandwidth */
7570         fixed20_12 dram_efficiency; /* 0.7 */
7571         fixed20_12 yclk, dram_channels, bandwidth;
7572         fixed20_12 a;
7573
7574         a.full = dfixed_const(1000);
7575         yclk.full = dfixed_const(wm->yclk);
7576         yclk.full = dfixed_div(yclk, a);
7577         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7578         a.full = dfixed_const(10);
7579         dram_efficiency.full = dfixed_const(7);
7580         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7581         bandwidth.full = dfixed_mul(dram_channels, yclk);
7582         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7583
7584         return dfixed_trunc(bandwidth);
7585 }
7586
7587 /**
7588  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7589  *
7590  * @wm: watermark calculation data
7591  *
7592  * Calculate the dram bandwidth used for display (CIK).
7593  * Used for display watermark bandwidth calculations
7594  * Returns the dram bandwidth for display in MBytes/s
7595  */
7596 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7597 {
7598         /* Calculate DRAM Bandwidth and the part allocated to display. */
7599         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7600         fixed20_12 yclk, dram_channels, bandwidth;
7601         fixed20_12 a;
7602
7603         a.full = dfixed_const(1000);
7604         yclk.full = dfixed_const(wm->yclk);
7605         yclk.full = dfixed_div(yclk, a);
7606         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7607         a.full = dfixed_const(10);
7608         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7609         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7610         bandwidth.full = dfixed_mul(dram_channels, yclk);
7611         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7612
7613         return dfixed_trunc(bandwidth);
7614 }
7615
7616 /**
7617  * dce8_data_return_bandwidth - get the data return bandwidth
7618  *
7619  * @wm: watermark calculation data
7620  *
7621  * Calculate the data return bandwidth used for display (CIK).
7622  * Used for display watermark bandwidth calculations
7623  * Returns the data return bandwidth in MBytes/s
7624  */
7625 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7626 {
7627         /* Calculate the display Data return Bandwidth */
7628         fixed20_12 return_efficiency; /* 0.8 */
7629         fixed20_12 sclk, bandwidth;
7630         fixed20_12 a;
7631
7632         a.full = dfixed_const(1000);
7633         sclk.full = dfixed_const(wm->sclk);
7634         sclk.full = dfixed_div(sclk, a);
7635         a.full = dfixed_const(10);
7636         return_efficiency.full = dfixed_const(8);
7637         return_efficiency.full = dfixed_div(return_efficiency, a);
7638         a.full = dfixed_const(32);
7639         bandwidth.full = dfixed_mul(a, sclk);
7640         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7641
7642         return dfixed_trunc(bandwidth);
7643 }
7644
7645 /**
7646  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7647  *
7648  * @wm: watermark calculation data
7649  *
7650  * Calculate the dmif bandwidth used for display (CIK).
7651  * Used for display watermark bandwidth calculations
7652  * Returns the dmif bandwidth in MBytes/s
7653  */
7654 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7655 {
7656         /* Calculate the DMIF Request Bandwidth */
7657         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7658         fixed20_12 disp_clk, bandwidth;
7659         fixed20_12 a, b;
7660
7661         a.full = dfixed_const(1000);
7662         disp_clk.full = dfixed_const(wm->disp_clk);
7663         disp_clk.full = dfixed_div(disp_clk, a);
7664         a.full = dfixed_const(32);
7665         b.full = dfixed_mul(a, disp_clk);
7666
7667         a.full = dfixed_const(10);
7668         disp_clk_request_efficiency.full = dfixed_const(8);
7669         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7670
7671         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7672
7673         return dfixed_trunc(bandwidth);
7674 }
7675
7676 /**
7677  * dce8_available_bandwidth - get the min available bandwidth
7678  *
7679  * @wm: watermark calculation data
7680  *
7681  * Calculate the min available bandwidth used for display (CIK).
7682  * Used for display watermark bandwidth calculations
7683  * Returns the min available bandwidth in MBytes/s
7684  */
7685 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7686 {
7687         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7688         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7689         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7690         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7691
7692         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7693 }
7694
7695 /**
7696  * dce8_average_bandwidth - get the average available bandwidth
7697  *
7698  * @wm: watermark calculation data
7699  *
7700  * Calculate the average available bandwidth used for display (CIK).
7701  * Used for display watermark bandwidth calculations
7702  * Returns the average available bandwidth in MBytes/s
7703  */
7704 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7705 {
7706         /* Calculate the display mode Average Bandwidth
7707          * DisplayMode should contain the source and destination dimensions,
7708          * timing, etc.
7709          */
7710         fixed20_12 bpp;
7711         fixed20_12 line_time;
7712         fixed20_12 src_width;
7713         fixed20_12 bandwidth;
7714         fixed20_12 a;
7715
7716         a.full = dfixed_const(1000);
7717         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7718         line_time.full = dfixed_div(line_time, a);
7719         bpp.full = dfixed_const(wm->bytes_per_pixel);
7720         src_width.full = dfixed_const(wm->src_width);
7721         bandwidth.full = dfixed_mul(src_width, bpp);
7722         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7723         bandwidth.full = dfixed_div(bandwidth, line_time);
7724
7725         return dfixed_trunc(bandwidth);
7726 }
7727
7728 /**
7729  * dce8_latency_watermark - get the latency watermark
7730  *
7731  * @wm: watermark calculation data
7732  *
7733  * Calculate the latency watermark (CIK).
7734  * Used for display watermark bandwidth calculations
7735  * Returns the latency watermark in ns
7736  */
7737 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7738 {
7739         /* First calculate the latency in ns */
7740         u32 mc_latency = 2000; /* 2000 ns. */
7741         u32 available_bandwidth = dce8_available_bandwidth(wm);
7742         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7743         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7744         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7745         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7746                 (wm->num_heads * cursor_line_pair_return_time);
7747         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7748         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7749         u32 tmp, dmif_size = 12288;
7750         fixed20_12 a, b, c;
7751
7752         if (wm->num_heads == 0)
7753                 return 0;
7754
7755         a.full = dfixed_const(2);
7756         b.full = dfixed_const(1);
7757         if ((wm->vsc.full > a.full) ||
7758             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7759             (wm->vtaps >= 5) ||
7760             ((wm->vsc.full >= a.full) && wm->interlaced))
7761                 max_src_lines_per_dst_line = 4;
7762         else
7763                 max_src_lines_per_dst_line = 2;
7764
7765         a.full = dfixed_const(available_bandwidth);
7766         b.full = dfixed_const(wm->num_heads);
7767         a.full = dfixed_div(a, b);
7768
7769         b.full = dfixed_const(mc_latency + 512);
7770         c.full = dfixed_const(wm->disp_clk);
7771         b.full = dfixed_div(b, c);
7772
7773         c.full = dfixed_const(dmif_size);
7774         b.full = dfixed_div(c, b);
7775
7776         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7777
7778         b.full = dfixed_const(1000);
7779         c.full = dfixed_const(wm->disp_clk);
7780         b.full = dfixed_div(c, b);
7781         c.full = dfixed_const(wm->bytes_per_pixel);
7782         b.full = dfixed_mul(b, c);
7783
7784         lb_fill_bw = min(tmp, dfixed_trunc(b));
7785
7786         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7787         b.full = dfixed_const(1000);
7788         c.full = dfixed_const(lb_fill_bw);
7789         b.full = dfixed_div(c, b);
7790         a.full = dfixed_div(a, b);
7791         line_fill_time = dfixed_trunc(a);
7792
7793         if (line_fill_time < wm->active_time)
7794                 return latency;
7795         else
7796                 return latency + (line_fill_time - wm->active_time);
7797
7798 }
7799
7800 /**
7801  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7802  * average and available dram bandwidth
7803  *
7804  * @wm: watermark calculation data
7805  *
7806  * Check if the display average bandwidth fits in the display
7807  * dram bandwidth (CIK).
7808  * Used for display watermark bandwidth calculations
7809  * Returns true if the display fits, false if not.
7810  */
7811 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7812 {
7813         if (dce8_average_bandwidth(wm) <=
7814             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7815                 return true;
7816         else
7817                 return false;
7818 }
7819
7820 /**
7821  * dce8_average_bandwidth_vs_available_bandwidth - check
7822  * average and available bandwidth
7823  *
7824  * @wm: watermark calculation data
7825  *
7826  * Check if the display average bandwidth fits in the display
7827  * available bandwidth (CIK).
7828  * Used for display watermark bandwidth calculations
7829  * Returns true if the display fits, false if not.
7830  */
7831 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7832 {
7833         if (dce8_average_bandwidth(wm) <=
7834             (dce8_available_bandwidth(wm) / wm->num_heads))
7835                 return true;
7836         else
7837                 return false;
7838 }
7839
7840 /**
7841  * dce8_check_latency_hiding - check latency hiding
7842  *
7843  * @wm: watermark calculation data
7844  *
7845  * Check latency hiding (CIK).
7846  * Used for display watermark bandwidth calculations
7847  * Returns true if the display fits, false if not.
7848  */
7849 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7850 {
7851         u32 lb_partitions = wm->lb_size / wm->src_width;
7852         u32 line_time = wm->active_time + wm->blank_time;
7853         u32 latency_tolerant_lines;
7854         u32 latency_hiding;
7855         fixed20_12 a;
7856
7857         a.full = dfixed_const(1);
7858         if (wm->vsc.full > a.full)
7859                 latency_tolerant_lines = 1;
7860         else {
7861                 if (lb_partitions <= (wm->vtaps + 1))
7862                         latency_tolerant_lines = 1;
7863                 else
7864                         latency_tolerant_lines = 2;
7865         }
7866
7867         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7868
7869         if (dce8_latency_watermark(wm) <= latency_hiding)
7870                 return true;
7871         else
7872                 return false;
7873 }
7874
7875 /**
7876  * dce8_program_watermarks - program display watermarks
7877  *
7878  * @rdev: radeon_device pointer
7879  * @radeon_crtc: the selected display controller
7880  * @lb_size: line buffer size
7881  * @num_heads: number of display controllers in use
7882  *
7883  * Calculate and program the display watermarks for the
7884  * selected display controller (CIK).
7885  */
7886 static void dce8_program_watermarks(struct radeon_device *rdev,
7887                                     struct radeon_crtc *radeon_crtc,
7888                                     u32 lb_size, u32 num_heads)
7889 {
7890         struct drm_display_mode *mode = &radeon_crtc->base.mode;
7891         struct dce8_wm_params wm_low, wm_high;
7892         u32 pixel_period;
7893         u32 line_time = 0;
7894         u32 latency_watermark_a = 0, latency_watermark_b = 0;
7895         u32 tmp, wm_mask;
7896
7897         if (radeon_crtc->base.enabled && num_heads && mode) {
7898                 pixel_period = 1000000 / (u32)mode->clock;
7899                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7900
7901                 /* watermark for high clocks */
7902                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7903                     rdev->pm.dpm_enabled) {
7904                         wm_high.yclk =
7905                                 radeon_dpm_get_mclk(rdev, false) * 10;
7906                         wm_high.sclk =
7907                                 radeon_dpm_get_sclk(rdev, false) * 10;
7908                 } else {
7909                         wm_high.yclk = rdev->pm.current_mclk * 10;
7910                         wm_high.sclk = rdev->pm.current_sclk * 10;
7911                 }
7912
7913                 wm_high.disp_clk = mode->clock;
7914                 wm_high.src_width = mode->crtc_hdisplay;
7915                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7916                 wm_high.blank_time = line_time - wm_high.active_time;
7917                 wm_high.interlaced = false;
7918                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7919                         wm_high.interlaced = true;
7920                 wm_high.vsc = radeon_crtc->vsc;
7921                 wm_high.vtaps = 1;
7922                 if (radeon_crtc->rmx_type != RMX_OFF)
7923                         wm_high.vtaps = 2;
7924                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7925                 wm_high.lb_size = lb_size;
7926                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7927                 wm_high.num_heads = num_heads;
7928
7929                 /* set for high clocks */
7930                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7931
7932                 /* possibly force display priority to high */
7933                 /* should really do this at mode validation time... */
7934                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7935                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7936                     !dce8_check_latency_hiding(&wm_high) ||
7937                     (rdev->disp_priority == 2)) {
7938                         DRM_DEBUG_KMS("force priority to high\n");
7939                 }
7940
7941                 /* watermark for low clocks */
7942                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7943                     rdev->pm.dpm_enabled) {
7944                         wm_low.yclk =
7945                                 radeon_dpm_get_mclk(rdev, true) * 10;
7946                         wm_low.sclk =
7947                                 radeon_dpm_get_sclk(rdev, true) * 10;
7948                 } else {
7949                         wm_low.yclk = rdev->pm.current_mclk * 10;
7950                         wm_low.sclk = rdev->pm.current_sclk * 10;
7951                 }
7952
7953                 wm_low.disp_clk = mode->clock;
7954                 wm_low.src_width = mode->crtc_hdisplay;
7955                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7956                 wm_low.blank_time = line_time - wm_low.active_time;
7957                 wm_low.interlaced = false;
7958                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7959                         wm_low.interlaced = true;
7960                 wm_low.vsc = radeon_crtc->vsc;
7961                 wm_low.vtaps = 1;
7962                 if (radeon_crtc->rmx_type != RMX_OFF)
7963                         wm_low.vtaps = 2;
7964                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7965                 wm_low.lb_size = lb_size;
7966                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7967                 wm_low.num_heads = num_heads;
7968
7969                 /* set for low clocks */
7970                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7971
7972                 /* possibly force display priority to high */
7973                 /* should really do this at mode validation time... */
7974                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7975                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7976                     !dce8_check_latency_hiding(&wm_low) ||
7977                     (rdev->disp_priority == 2)) {
7978                         DRM_DEBUG_KMS("force priority to high\n");
7979                 }
7980         }
7981
7982         /* select wm A */
7983         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7984         tmp = wm_mask;
7985         tmp &= ~LATENCY_WATERMARK_MASK(3);
7986         tmp |= LATENCY_WATERMARK_MASK(1);
7987         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7988         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7989                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7990                 LATENCY_HIGH_WATERMARK(line_time)));
7991         /* select wm B */
7992         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7993         tmp &= ~LATENCY_WATERMARK_MASK(3);
7994         tmp |= LATENCY_WATERMARK_MASK(2);
7995         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7996         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7997                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7998                 LATENCY_HIGH_WATERMARK(line_time)));
7999         /* restore original selection */
8000         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8001
8002         /* save values for DPM */
8003         radeon_crtc->line_time = line_time;
8004         radeon_crtc->wm_high = latency_watermark_a;
8005         radeon_crtc->wm_low = latency_watermark_b;
8006 }
8007
8008 /**
8009  * dce8_bandwidth_update - program display watermarks
8010  *
8011  * @rdev: radeon_device pointer
8012  *
8013  * Calculate and program the display watermarks and line
8014  * buffer allocation (CIK).
8015  */
8016 void dce8_bandwidth_update(struct radeon_device *rdev)
8017 {
8018         struct drm_display_mode *mode = NULL;
8019         u32 num_heads = 0, lb_size;
8020         int i;
8021
8022         radeon_update_display_priority(rdev);
8023
8024         for (i = 0; i < rdev->num_crtc; i++) {
8025                 if (rdev->mode_info.crtcs[i]->base.enabled)
8026                         num_heads++;
8027         }
8028         for (i = 0; i < rdev->num_crtc; i++) {
8029                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8030                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8031                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8032         }
8033 }
8034
8035 /**
8036  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8037  *
8038  * @rdev: radeon_device pointer
8039  *
8040  * Fetches a GPU clock counter snapshot (SI).
8041  * Returns the 64 bit clock counter snapshot.
8042  */
8043 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8044 {
8045         uint64_t clock;
8046
8047         mutex_lock(&rdev->gpu_clock_mutex);
8048         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8049         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8050                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8051         mutex_unlock(&rdev->gpu_clock_mutex);
8052         return clock;
8053 }
8054
8055 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8056                               u32 cntl_reg, u32 status_reg)
8057 {
8058         int r, i;
8059         struct atom_clock_dividers dividers;
8060         uint32_t tmp;
8061
8062         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8063                                            clock, false, &dividers);
8064         if (r)
8065                 return r;
8066
8067         tmp = RREG32_SMC(cntl_reg);
8068         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8069         tmp |= dividers.post_divider;
8070         WREG32_SMC(cntl_reg, tmp);
8071
8072         for (i = 0; i < 100; i++) {
8073                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8074                         break;
8075                 mdelay(10);
8076         }
8077         if (i == 100)
8078                 return -ETIMEDOUT;
8079
8080         return 0;
8081 }
8082
8083 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8084 {
8085         int r = 0;
8086
8087         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8088         if (r)
8089                 return r;
8090
8091         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8092         return r;
8093 }
8094
8095 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8096 {
8097         struct pci_dev *root = rdev->pdev->bus->self;
8098         int bridge_pos, gpu_pos;
8099         u32 speed_cntl, mask, current_data_rate;
8100         int ret, i;
8101         u16 tmp16;
8102
8103         if (radeon_pcie_gen2 == 0)
8104                 return;
8105
8106         if (rdev->flags & RADEON_IS_IGP)
8107                 return;
8108
8109         if (!(rdev->flags & RADEON_IS_PCIE))
8110                 return;
8111
8112         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8113         if (ret != 0)
8114                 return;
8115
8116         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8117                 return;
8118
8119         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8120         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8121                 LC_CURRENT_DATA_RATE_SHIFT;
8122         if (mask & DRM_PCIE_SPEED_80) {
8123                 if (current_data_rate == 2) {
8124                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8125                         return;
8126                 }
8127                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8128         } else if (mask & DRM_PCIE_SPEED_50) {
8129                 if (current_data_rate == 1) {
8130                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8131                         return;
8132                 }
8133                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8134         }
8135
8136         bridge_pos = pci_pcie_cap(root);
8137         if (!bridge_pos)
8138                 return;
8139
8140         gpu_pos = pci_pcie_cap(rdev->pdev);
8141         if (!gpu_pos)
8142                 return;
8143
8144         if (mask & DRM_PCIE_SPEED_80) {
8145                 /* re-try equalization if gen3 is not already enabled */
8146                 if (current_data_rate != 2) {
8147                         u16 bridge_cfg, gpu_cfg;
8148                         u16 bridge_cfg2, gpu_cfg2;
8149                         u32 max_lw, current_lw, tmp;
8150
8151                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8152                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8153
8154                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8155                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8156
8157                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8158                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8159
8160                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8161                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8162                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8163
8164                         if (current_lw < max_lw) {
8165                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8166                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8167                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8168                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8169                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8170                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8171                                 }
8172                         }
8173
8174                         for (i = 0; i < 10; i++) {
8175                                 /* check status */
8176                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8177                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8178                                         break;
8179
8180                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8181                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8182
8183                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8184                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8185
8186                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8187                                 tmp |= LC_SET_QUIESCE;
8188                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8189
8190                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8191                                 tmp |= LC_REDO_EQ;
8192                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8193
8194                                 mdelay(100);
8195
8196                                 /* linkctl */
8197                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8198                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8199                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8200                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8201
8202                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8203                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8204                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8205                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8206
8207                                 /* linkctl2 */
8208                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8209                                 tmp16 &= ~((1 << 4) | (7 << 9));
8210                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8211                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8212
8213                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8214                                 tmp16 &= ~((1 << 4) | (7 << 9));
8215                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8216                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8217
8218                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8219                                 tmp &= ~LC_SET_QUIESCE;
8220                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8221                         }
8222                 }
8223         }
8224
8225         /* set the link speed */
8226         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8227         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8228         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8229
8230         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8231         tmp16 &= ~0xf;
8232         if (mask & DRM_PCIE_SPEED_80)
8233                 tmp16 |= 3; /* gen3 */
8234         else if (mask & DRM_PCIE_SPEED_50)
8235                 tmp16 |= 2; /* gen2 */
8236         else
8237                 tmp16 |= 1; /* gen1 */
8238         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8239
8240         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8241         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8242         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8243
8244         for (i = 0; i < rdev->usec_timeout; i++) {
8245                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8246                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8247                         break;
8248                 udelay(1);
8249         }
8250 }
8251
8252 static void cik_program_aspm(struct radeon_device *rdev)
8253 {
8254         u32 data, orig;
8255         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8256         bool disable_clkreq = false;
8257
8258         if (radeon_aspm == 0)
8259                 return;
8260
8261         /* XXX double check IGPs */
8262         if (rdev->flags & RADEON_IS_IGP)
8263                 return;
8264
8265         if (!(rdev->flags & RADEON_IS_PCIE))
8266                 return;
8267
8268         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8269         data &= ~LC_XMIT_N_FTS_MASK;
8270         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8271         if (orig != data)
8272                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8273
8274         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8275         data |= LC_GO_TO_RECOVERY;
8276         if (orig != data)
8277                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8278
8279         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8280         data |= P_IGNORE_EDB_ERR;
8281         if (orig != data)
8282                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8283
8284         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8285         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8286         data |= LC_PMI_TO_L1_DIS;
8287         if (!disable_l0s)
8288                 data |= LC_L0S_INACTIVITY(7);
8289
8290         if (!disable_l1) {
8291                 data |= LC_L1_INACTIVITY(7);
8292                 data &= ~LC_PMI_TO_L1_DIS;
8293                 if (orig != data)
8294                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8295
8296                 if (!disable_plloff_in_l1) {
8297                         bool clk_req_support;
8298
8299                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8300                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8301                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8302                         if (orig != data)
8303                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8304
8305                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8306                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8307                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8308                         if (orig != data)
8309                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8310
8311                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8312                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8313                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8314                         if (orig != data)
8315                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8316
8317                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8318                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8319                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8320                         if (orig != data)
8321                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8322
8323                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8324                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8325                         data |= LC_DYN_LANES_PWR_STATE(3);
8326                         if (orig != data)
8327                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8328
8329                         if (!disable_clkreq) {
8330                                 struct pci_dev *root = rdev->pdev->bus->self;
8331                                 u32 lnkcap;
8332
8333                                 clk_req_support = false;
8334                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8335                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8336                                         clk_req_support = true;
8337                         } else {
8338                                 clk_req_support = false;
8339                         }
8340
8341                         if (clk_req_support) {
8342                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8343                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8344                                 if (orig != data)
8345                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8346
8347                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8348                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8349                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8350                                 if (orig != data)
8351                                         WREG32_SMC(THM_CLK_CNTL, data);
8352
8353                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8354                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8355                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8356                                 if (orig != data)
8357                                         WREG32_SMC(MISC_CLK_CTRL, data);
8358
8359                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8360                                 data &= ~BCLK_AS_XCLK;
8361                                 if (orig != data)
8362                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8363
8364                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8365                                 data &= ~FORCE_BIF_REFCLK_EN;
8366                                 if (orig != data)
8367                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8368
8369                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8370                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8371                                 data |= MPLL_CLKOUT_SEL(4);
8372                                 if (orig != data)
8373                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8374                         }
8375                 }
8376         } else {
8377                 if (orig != data)
8378                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8379         }
8380
8381         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8382         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8383         if (orig != data)
8384                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8385
8386         if (!disable_l0s) {
8387                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8388                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8389                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8390                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8391                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8392                                 data &= ~LC_L0S_INACTIVITY_MASK;
8393                                 if (orig != data)
8394                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8395                         }
8396                 }
8397         }
8398 }