]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/cik.c
rt2x00: rt2800pci: use module_pci_driver macro
[karo-tx-linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71                                  struct radeon_ib *ib,
72                                  uint64_t pe,
73                                  uint64_t addr, unsigned count,
74                                  uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
81                                           bool enable);
82
83 /* get temperature in millidegrees */
84 int ci_get_temp(struct radeon_device *rdev)
85 {
86         u32 temp;
87         int actual_temp = 0;
88
89         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
90                 CTF_TEMP_SHIFT;
91
92         if (temp & 0x200)
93                 actual_temp = 255;
94         else
95                 actual_temp = temp & 0x1ff;
96
97         actual_temp = actual_temp * 1000;
98
99         return actual_temp;
100 }
101
102 /* get temperature in millidegrees */
103 int kv_get_temp(struct radeon_device *rdev)
104 {
105         u32 temp;
106         int actual_temp = 0;
107
108         temp = RREG32_SMC(0xC0300E0C);
109
110         if (temp)
111                 actual_temp = (temp / 8) - 49;
112         else
113                 actual_temp = 0;
114
115         actual_temp = actual_temp * 1000;
116
117         return actual_temp;
118 }
119
120 /*
121  * Indirect registers accessor
122  */
123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
124 {
125         unsigned long flags;
126         u32 r;
127
128         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
129         WREG32(PCIE_INDEX, reg);
130         (void)RREG32(PCIE_INDEX);
131         r = RREG32(PCIE_DATA);
132         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
133         return r;
134 }
135
136 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
137 {
138         unsigned long flags;
139
140         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
141         WREG32(PCIE_INDEX, reg);
142         (void)RREG32(PCIE_INDEX);
143         WREG32(PCIE_DATA, v);
144         (void)RREG32(PCIE_DATA);
145         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
146 }
147
148 static const u32 spectre_rlc_save_restore_register_list[] =
149 {
150         (0x0e00 << 16) | (0xc12c >> 2),
151         0x00000000,
152         (0x0e00 << 16) | (0xc140 >> 2),
153         0x00000000,
154         (0x0e00 << 16) | (0xc150 >> 2),
155         0x00000000,
156         (0x0e00 << 16) | (0xc15c >> 2),
157         0x00000000,
158         (0x0e00 << 16) | (0xc168 >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0xc170 >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0xc178 >> 2),
163         0x00000000,
164         (0x0e00 << 16) | (0xc204 >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0xc2b4 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0xc2b8 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc2bc >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0xc2c0 >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0x8228 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0x829c >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0x869c >> 2),
179         0x00000000,
180         (0x0600 << 16) | (0x98f4 >> 2),
181         0x00000000,
182         (0x0e00 << 16) | (0x98f8 >> 2),
183         0x00000000,
184         (0x0e00 << 16) | (0x9900 >> 2),
185         0x00000000,
186         (0x0e00 << 16) | (0xc260 >> 2),
187         0x00000000,
188         (0x0e00 << 16) | (0x90e8 >> 2),
189         0x00000000,
190         (0x0e00 << 16) | (0x3c000 >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0x3c00c >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0x8c1c >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0x9700 >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0xcd20 >> 2),
199         0x00000000,
200         (0x4e00 << 16) | (0xcd20 >> 2),
201         0x00000000,
202         (0x5e00 << 16) | (0xcd20 >> 2),
203         0x00000000,
204         (0x6e00 << 16) | (0xcd20 >> 2),
205         0x00000000,
206         (0x7e00 << 16) | (0xcd20 >> 2),
207         0x00000000,
208         (0x8e00 << 16) | (0xcd20 >> 2),
209         0x00000000,
210         (0x9e00 << 16) | (0xcd20 >> 2),
211         0x00000000,
212         (0xae00 << 16) | (0xcd20 >> 2),
213         0x00000000,
214         (0xbe00 << 16) | (0xcd20 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0x89bc >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0x8900 >> 2),
219         0x00000000,
220         0x3,
221         (0x0e00 << 16) | (0xc130 >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0xc134 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0xc1fc >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0xc208 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc264 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc268 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc26c >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc270 >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc274 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc278 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc27c >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc280 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc284 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc288 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc28c >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc290 >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0xc294 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0xc298 >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xc29c >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xc2a0 >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc2a4 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc2a8 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc2ac  >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc2b0 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0x301d0 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0x30238 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0x30250 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0x30254 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x30258 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x3025c >> 2),
280         0x00000000,
281         (0x4e00 << 16) | (0xc900 >> 2),
282         0x00000000,
283         (0x5e00 << 16) | (0xc900 >> 2),
284         0x00000000,
285         (0x6e00 << 16) | (0xc900 >> 2),
286         0x00000000,
287         (0x7e00 << 16) | (0xc900 >> 2),
288         0x00000000,
289         (0x8e00 << 16) | (0xc900 >> 2),
290         0x00000000,
291         (0x9e00 << 16) | (0xc900 >> 2),
292         0x00000000,
293         (0xae00 << 16) | (0xc900 >> 2),
294         0x00000000,
295         (0xbe00 << 16) | (0xc900 >> 2),
296         0x00000000,
297         (0x4e00 << 16) | (0xc904 >> 2),
298         0x00000000,
299         (0x5e00 << 16) | (0xc904 >> 2),
300         0x00000000,
301         (0x6e00 << 16) | (0xc904 >> 2),
302         0x00000000,
303         (0x7e00 << 16) | (0xc904 >> 2),
304         0x00000000,
305         (0x8e00 << 16) | (0xc904 >> 2),
306         0x00000000,
307         (0x9e00 << 16) | (0xc904 >> 2),
308         0x00000000,
309         (0xae00 << 16) | (0xc904 >> 2),
310         0x00000000,
311         (0xbe00 << 16) | (0xc904 >> 2),
312         0x00000000,
313         (0x4e00 << 16) | (0xc908 >> 2),
314         0x00000000,
315         (0x5e00 << 16) | (0xc908 >> 2),
316         0x00000000,
317         (0x6e00 << 16) | (0xc908 >> 2),
318         0x00000000,
319         (0x7e00 << 16) | (0xc908 >> 2),
320         0x00000000,
321         (0x8e00 << 16) | (0xc908 >> 2),
322         0x00000000,
323         (0x9e00 << 16) | (0xc908 >> 2),
324         0x00000000,
325         (0xae00 << 16) | (0xc908 >> 2),
326         0x00000000,
327         (0xbe00 << 16) | (0xc908 >> 2),
328         0x00000000,
329         (0x4e00 << 16) | (0xc90c >> 2),
330         0x00000000,
331         (0x5e00 << 16) | (0xc90c >> 2),
332         0x00000000,
333         (0x6e00 << 16) | (0xc90c >> 2),
334         0x00000000,
335         (0x7e00 << 16) | (0xc90c >> 2),
336         0x00000000,
337         (0x8e00 << 16) | (0xc90c >> 2),
338         0x00000000,
339         (0x9e00 << 16) | (0xc90c >> 2),
340         0x00000000,
341         (0xae00 << 16) | (0xc90c >> 2),
342         0x00000000,
343         (0xbe00 << 16) | (0xc90c >> 2),
344         0x00000000,
345         (0x4e00 << 16) | (0xc910 >> 2),
346         0x00000000,
347         (0x5e00 << 16) | (0xc910 >> 2),
348         0x00000000,
349         (0x6e00 << 16) | (0xc910 >> 2),
350         0x00000000,
351         (0x7e00 << 16) | (0xc910 >> 2),
352         0x00000000,
353         (0x8e00 << 16) | (0xc910 >> 2),
354         0x00000000,
355         (0x9e00 << 16) | (0xc910 >> 2),
356         0x00000000,
357         (0xae00 << 16) | (0xc910 >> 2),
358         0x00000000,
359         (0xbe00 << 16) | (0xc910 >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc99c >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0x9834 >> 2),
364         0x00000000,
365         (0x0000 << 16) | (0x30f00 >> 2),
366         0x00000000,
367         (0x0001 << 16) | (0x30f00 >> 2),
368         0x00000000,
369         (0x0000 << 16) | (0x30f04 >> 2),
370         0x00000000,
371         (0x0001 << 16) | (0x30f04 >> 2),
372         0x00000000,
373         (0x0000 << 16) | (0x30f08 >> 2),
374         0x00000000,
375         (0x0001 << 16) | (0x30f08 >> 2),
376         0x00000000,
377         (0x0000 << 16) | (0x30f0c >> 2),
378         0x00000000,
379         (0x0001 << 16) | (0x30f0c >> 2),
380         0x00000000,
381         (0x0600 << 16) | (0x9b7c >> 2),
382         0x00000000,
383         (0x0e00 << 16) | (0x8a14 >> 2),
384         0x00000000,
385         (0x0e00 << 16) | (0x8a18 >> 2),
386         0x00000000,
387         (0x0600 << 16) | (0x30a00 >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0x8bf0 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x8bcc >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x8b24 >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x30a04 >> 2),
396         0x00000000,
397         (0x0600 << 16) | (0x30a10 >> 2),
398         0x00000000,
399         (0x0600 << 16) | (0x30a14 >> 2),
400         0x00000000,
401         (0x0600 << 16) | (0x30a18 >> 2),
402         0x00000000,
403         (0x0600 << 16) | (0x30a2c >> 2),
404         0x00000000,
405         (0x0e00 << 16) | (0xc700 >> 2),
406         0x00000000,
407         (0x0e00 << 16) | (0xc704 >> 2),
408         0x00000000,
409         (0x0e00 << 16) | (0xc708 >> 2),
410         0x00000000,
411         (0x0e00 << 16) | (0xc768 >> 2),
412         0x00000000,
413         (0x0400 << 16) | (0xc770 >> 2),
414         0x00000000,
415         (0x0400 << 16) | (0xc774 >> 2),
416         0x00000000,
417         (0x0400 << 16) | (0xc778 >> 2),
418         0x00000000,
419         (0x0400 << 16) | (0xc77c >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc780 >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc784 >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc788 >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc78c >> 2),
428         0x00000000,
429         (0x0400 << 16) | (0xc798 >> 2),
430         0x00000000,
431         (0x0400 << 16) | (0xc79c >> 2),
432         0x00000000,
433         (0x0400 << 16) | (0xc7a0 >> 2),
434         0x00000000,
435         (0x0400 << 16) | (0xc7a4 >> 2),
436         0x00000000,
437         (0x0400 << 16) | (0xc7a8 >> 2),
438         0x00000000,
439         (0x0400 << 16) | (0xc7ac >> 2),
440         0x00000000,
441         (0x0400 << 16) | (0xc7b0 >> 2),
442         0x00000000,
443         (0x0400 << 16) | (0xc7b4 >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0x9100 >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0x3c010 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x92a8 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x92ac >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x92b4 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x92b8 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x92bc >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x92c0 >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x92c4 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0x92c8 >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x92cc >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0x92d0 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0x8c00 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0x8c04 >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0x8c20 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0x8c38 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8c3c >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0xae00 >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0x9604 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0xac08 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xac0c >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0xac10 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0xac14 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac58 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac68 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac6c >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0xac70 >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xac74 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xac78 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xac7c >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xac80 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xac84 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xac88 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xac8c >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x970c >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x9714 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x9718 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x971c >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x31068 >> 2),
522         0x00000000,
523         (0x4e00 << 16) | (0x31068 >> 2),
524         0x00000000,
525         (0x5e00 << 16) | (0x31068 >> 2),
526         0x00000000,
527         (0x6e00 << 16) | (0x31068 >> 2),
528         0x00000000,
529         (0x7e00 << 16) | (0x31068 >> 2),
530         0x00000000,
531         (0x8e00 << 16) | (0x31068 >> 2),
532         0x00000000,
533         (0x9e00 << 16) | (0x31068 >> 2),
534         0x00000000,
535         (0xae00 << 16) | (0x31068 >> 2),
536         0x00000000,
537         (0xbe00 << 16) | (0x31068 >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xcd10 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0xcd14 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0x88b0 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0x88b4 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x88b8 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x88bc >> 2),
550         0x00000000,
551         (0x0400 << 16) | (0x89c0 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x88c4 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x88c8 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x88d0 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x88d4 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x88d8 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x8980 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x30938 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x3093c >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x30940 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x89a0 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x30900 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x30904 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x89b4 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x3c210 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x3c214 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x3c218 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x8904 >> 2),
586         0x00000000,
587         0x5,
588         (0x0e00 << 16) | (0x8c28 >> 2),
589         (0x0e00 << 16) | (0x8c2c >> 2),
590         (0x0e00 << 16) | (0x8c30 >> 2),
591         (0x0e00 << 16) | (0x8c34 >> 2),
592         (0x0e00 << 16) | (0x9600 >> 2),
593 };
594
595 static const u32 kalindi_rlc_save_restore_register_list[] =
596 {
597         (0x0e00 << 16) | (0xc12c >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xc140 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xc150 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xc15c >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xc168 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xc170 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xc204 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xc2b4 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xc2b8 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc2bc >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xc2c0 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x8228 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x829c >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x869c >> 2),
624         0x00000000,
625         (0x0600 << 16) | (0x98f4 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x98f8 >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x9900 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0xc260 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x90e8 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x3c000 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x3c00c >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x8c1c >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x9700 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0xcd20 >> 2),
644         0x00000000,
645         (0x4e00 << 16) | (0xcd20 >> 2),
646         0x00000000,
647         (0x5e00 << 16) | (0xcd20 >> 2),
648         0x00000000,
649         (0x6e00 << 16) | (0xcd20 >> 2),
650         0x00000000,
651         (0x7e00 << 16) | (0xcd20 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0x89bc >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0x8900 >> 2),
656         0x00000000,
657         0x3,
658         (0x0e00 << 16) | (0xc130 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xc134 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xc1fc >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc208 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc264 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc268 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc26c >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc270 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc274 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc28c >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc290 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0xc294 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0xc298 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0xc2a0 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xc2a4 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0xc2a8 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc2ac >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x301d0 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30238 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x30250 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x30254 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x30258 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3025c >> 2),
703         0x00000000,
704         (0x4e00 << 16) | (0xc900 >> 2),
705         0x00000000,
706         (0x5e00 << 16) | (0xc900 >> 2),
707         0x00000000,
708         (0x6e00 << 16) | (0xc900 >> 2),
709         0x00000000,
710         (0x7e00 << 16) | (0xc900 >> 2),
711         0x00000000,
712         (0x4e00 << 16) | (0xc904 >> 2),
713         0x00000000,
714         (0x5e00 << 16) | (0xc904 >> 2),
715         0x00000000,
716         (0x6e00 << 16) | (0xc904 >> 2),
717         0x00000000,
718         (0x7e00 << 16) | (0xc904 >> 2),
719         0x00000000,
720         (0x4e00 << 16) | (0xc908 >> 2),
721         0x00000000,
722         (0x5e00 << 16) | (0xc908 >> 2),
723         0x00000000,
724         (0x6e00 << 16) | (0xc908 >> 2),
725         0x00000000,
726         (0x7e00 << 16) | (0xc908 >> 2),
727         0x00000000,
728         (0x4e00 << 16) | (0xc90c >> 2),
729         0x00000000,
730         (0x5e00 << 16) | (0xc90c >> 2),
731         0x00000000,
732         (0x6e00 << 16) | (0xc90c >> 2),
733         0x00000000,
734         (0x7e00 << 16) | (0xc90c >> 2),
735         0x00000000,
736         (0x4e00 << 16) | (0xc910 >> 2),
737         0x00000000,
738         (0x5e00 << 16) | (0xc910 >> 2),
739         0x00000000,
740         (0x6e00 << 16) | (0xc910 >> 2),
741         0x00000000,
742         (0x7e00 << 16) | (0xc910 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0xc99c >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x9834 >> 2),
747         0x00000000,
748         (0x0000 << 16) | (0x30f00 >> 2),
749         0x00000000,
750         (0x0000 << 16) | (0x30f04 >> 2),
751         0x00000000,
752         (0x0000 << 16) | (0x30f08 >> 2),
753         0x00000000,
754         (0x0000 << 16) | (0x30f0c >> 2),
755         0x00000000,
756         (0x0600 << 16) | (0x9b7c >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x8a14 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x8a18 >> 2),
761         0x00000000,
762         (0x0600 << 16) | (0x30a00 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x8bf0 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0x8bcc >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0x8b24 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0x30a04 >> 2),
771         0x00000000,
772         (0x0600 << 16) | (0x30a10 >> 2),
773         0x00000000,
774         (0x0600 << 16) | (0x30a14 >> 2),
775         0x00000000,
776         (0x0600 << 16) | (0x30a18 >> 2),
777         0x00000000,
778         (0x0600 << 16) | (0x30a2c >> 2),
779         0x00000000,
780         (0x0e00 << 16) | (0xc700 >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc704 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc708 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc768 >> 2),
787         0x00000000,
788         (0x0400 << 16) | (0xc770 >> 2),
789         0x00000000,
790         (0x0400 << 16) | (0xc774 >> 2),
791         0x00000000,
792         (0x0400 << 16) | (0xc798 >> 2),
793         0x00000000,
794         (0x0400 << 16) | (0xc79c >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0x9100 >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0x3c010 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x8c00 >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x8c04 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x8c20 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8c38 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8c3c >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0xae00 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0x9604 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0xac08 >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0xac0c >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0xac10 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xac14 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac58 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac68 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac6c >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xac70 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0xac74 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0xac78 >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0xac7c >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0xac80 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xac84 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xac88 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xac8c >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0x970c >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0x9714 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x9718 >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x971c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x31068 >> 2),
853         0x00000000,
854         (0x4e00 << 16) | (0x31068 >> 2),
855         0x00000000,
856         (0x5e00 << 16) | (0x31068 >> 2),
857         0x00000000,
858         (0x6e00 << 16) | (0x31068 >> 2),
859         0x00000000,
860         (0x7e00 << 16) | (0x31068 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0xcd10 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0xcd14 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x88b0 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x88b4 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x88b8 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x88bc >> 2),
873         0x00000000,
874         (0x0400 << 16) | (0x89c0 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0x88c4 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x88c8 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0x88d0 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x88d4 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x88d8 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x8980 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x30938 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x3093c >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x30940 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x89a0 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x30900 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x30904 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x89b4 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x3e1fc >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x3c210 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x3c214 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x3c218 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x8904 >> 2),
911         0x00000000,
912         0x5,
913         (0x0e00 << 16) | (0x8c28 >> 2),
914         (0x0e00 << 16) | (0x8c2c >> 2),
915         (0x0e00 << 16) | (0x8c30 >> 2),
916         (0x0e00 << 16) | (0x8c34 >> 2),
917         (0x0e00 << 16) | (0x9600 >> 2),
918 };
919
920 static const u32 bonaire_golden_spm_registers[] =
921 {
922         0x30800, 0xe0ffffff, 0xe0000000
923 };
924
925 static const u32 bonaire_golden_common_registers[] =
926 {
927         0xc770, 0xffffffff, 0x00000800,
928         0xc774, 0xffffffff, 0x00000800,
929         0xc798, 0xffffffff, 0x00007fbf,
930         0xc79c, 0xffffffff, 0x00007faf
931 };
932
933 static const u32 bonaire_golden_registers[] =
934 {
935         0x3354, 0x00000333, 0x00000333,
936         0x3350, 0x000c0fc0, 0x00040200,
937         0x9a10, 0x00010000, 0x00058208,
938         0x3c000, 0xffff1fff, 0x00140000,
939         0x3c200, 0xfdfc0fff, 0x00000100,
940         0x3c234, 0x40000000, 0x40000200,
941         0x9830, 0xffffffff, 0x00000000,
942         0x9834, 0xf00fffff, 0x00000400,
943         0x9838, 0x0002021c, 0x00020200,
944         0xc78, 0x00000080, 0x00000000,
945         0x5bb0, 0x000000f0, 0x00000070,
946         0x5bc0, 0xf0311fff, 0x80300000,
947         0x98f8, 0x73773777, 0x12010001,
948         0x350c, 0x00810000, 0x408af000,
949         0x7030, 0x31000111, 0x00000011,
950         0x2f48, 0x73773777, 0x12010001,
951         0x220c, 0x00007fb6, 0x0021a1b1,
952         0x2210, 0x00007fb6, 0x002021b1,
953         0x2180, 0x00007fb6, 0x00002191,
954         0x2218, 0x00007fb6, 0x002121b1,
955         0x221c, 0x00007fb6, 0x002021b1,
956         0x21dc, 0x00007fb6, 0x00002191,
957         0x21e0, 0x00007fb6, 0x00002191,
958         0x3628, 0x0000003f, 0x0000000a,
959         0x362c, 0x0000003f, 0x0000000a,
960         0x2ae4, 0x00073ffe, 0x000022a2,
961         0x240c, 0x000007ff, 0x00000000,
962         0x8a14, 0xf000003f, 0x00000007,
963         0x8bf0, 0x00002001, 0x00000001,
964         0x8b24, 0xffffffff, 0x00ffffff,
965         0x30a04, 0x0000ff0f, 0x00000000,
966         0x28a4c, 0x07ffffff, 0x06000000,
967         0x4d8, 0x00000fff, 0x00000100,
968         0x3e78, 0x00000001, 0x00000002,
969         0x9100, 0x03000000, 0x0362c688,
970         0x8c00, 0x000000ff, 0x00000001,
971         0xe40, 0x00001fff, 0x00001fff,
972         0x9060, 0x0000007f, 0x00000020,
973         0x9508, 0x00010000, 0x00010000,
974         0xac14, 0x000003ff, 0x000000f3,
975         0xac0c, 0xffffffff, 0x00001032
976 };
977
978 static const u32 bonaire_mgcg_cgcg_init[] =
979 {
980         0xc420, 0xffffffff, 0xfffffffc,
981         0x30800, 0xffffffff, 0xe0000000,
982         0x3c2a0, 0xffffffff, 0x00000100,
983         0x3c208, 0xffffffff, 0x00000100,
984         0x3c2c0, 0xffffffff, 0xc0000100,
985         0x3c2c8, 0xffffffff, 0xc0000100,
986         0x3c2c4, 0xffffffff, 0xc0000100,
987         0x55e4, 0xffffffff, 0x00600100,
988         0x3c280, 0xffffffff, 0x00000100,
989         0x3c214, 0xffffffff, 0x06000100,
990         0x3c220, 0xffffffff, 0x00000100,
991         0x3c218, 0xffffffff, 0x06000100,
992         0x3c204, 0xffffffff, 0x00000100,
993         0x3c2e0, 0xffffffff, 0x00000100,
994         0x3c224, 0xffffffff, 0x00000100,
995         0x3c200, 0xffffffff, 0x00000100,
996         0x3c230, 0xffffffff, 0x00000100,
997         0x3c234, 0xffffffff, 0x00000100,
998         0x3c250, 0xffffffff, 0x00000100,
999         0x3c254, 0xffffffff, 0x00000100,
1000         0x3c258, 0xffffffff, 0x00000100,
1001         0x3c25c, 0xffffffff, 0x00000100,
1002         0x3c260, 0xffffffff, 0x00000100,
1003         0x3c27c, 0xffffffff, 0x00000100,
1004         0x3c278, 0xffffffff, 0x00000100,
1005         0x3c210, 0xffffffff, 0x06000100,
1006         0x3c290, 0xffffffff, 0x00000100,
1007         0x3c274, 0xffffffff, 0x00000100,
1008         0x3c2b4, 0xffffffff, 0x00000100,
1009         0x3c2b0, 0xffffffff, 0x00000100,
1010         0x3c270, 0xffffffff, 0x00000100,
1011         0x30800, 0xffffffff, 0xe0000000,
1012         0x3c020, 0xffffffff, 0x00010000,
1013         0x3c024, 0xffffffff, 0x00030002,
1014         0x3c028, 0xffffffff, 0x00040007,
1015         0x3c02c, 0xffffffff, 0x00060005,
1016         0x3c030, 0xffffffff, 0x00090008,
1017         0x3c034, 0xffffffff, 0x00010000,
1018         0x3c038, 0xffffffff, 0x00030002,
1019         0x3c03c, 0xffffffff, 0x00040007,
1020         0x3c040, 0xffffffff, 0x00060005,
1021         0x3c044, 0xffffffff, 0x00090008,
1022         0x3c048, 0xffffffff, 0x00010000,
1023         0x3c04c, 0xffffffff, 0x00030002,
1024         0x3c050, 0xffffffff, 0x00040007,
1025         0x3c054, 0xffffffff, 0x00060005,
1026         0x3c058, 0xffffffff, 0x00090008,
1027         0x3c05c, 0xffffffff, 0x00010000,
1028         0x3c060, 0xffffffff, 0x00030002,
1029         0x3c064, 0xffffffff, 0x00040007,
1030         0x3c068, 0xffffffff, 0x00060005,
1031         0x3c06c, 0xffffffff, 0x00090008,
1032         0x3c070, 0xffffffff, 0x00010000,
1033         0x3c074, 0xffffffff, 0x00030002,
1034         0x3c078, 0xffffffff, 0x00040007,
1035         0x3c07c, 0xffffffff, 0x00060005,
1036         0x3c080, 0xffffffff, 0x00090008,
1037         0x3c084, 0xffffffff, 0x00010000,
1038         0x3c088, 0xffffffff, 0x00030002,
1039         0x3c08c, 0xffffffff, 0x00040007,
1040         0x3c090, 0xffffffff, 0x00060005,
1041         0x3c094, 0xffffffff, 0x00090008,
1042         0x3c098, 0xffffffff, 0x00010000,
1043         0x3c09c, 0xffffffff, 0x00030002,
1044         0x3c0a0, 0xffffffff, 0x00040007,
1045         0x3c0a4, 0xffffffff, 0x00060005,
1046         0x3c0a8, 0xffffffff, 0x00090008,
1047         0x3c000, 0xffffffff, 0x96e00200,
1048         0x8708, 0xffffffff, 0x00900100,
1049         0xc424, 0xffffffff, 0x0020003f,
1050         0x38, 0xffffffff, 0x0140001c,
1051         0x3c, 0x000f0000, 0x000f0000,
1052         0x220, 0xffffffff, 0xC060000C,
1053         0x224, 0xc0000fff, 0x00000100,
1054         0xf90, 0xffffffff, 0x00000100,
1055         0xf98, 0x00000101, 0x00000000,
1056         0x20a8, 0xffffffff, 0x00000104,
1057         0x55e4, 0xff000fff, 0x00000100,
1058         0x30cc, 0xc0000fff, 0x00000104,
1059         0xc1e4, 0x00000001, 0x00000001,
1060         0xd00c, 0xff000ff0, 0x00000100,
1061         0xd80c, 0xff000ff0, 0x00000100
1062 };
1063
1064 static const u32 spectre_golden_spm_registers[] =
1065 {
1066         0x30800, 0xe0ffffff, 0xe0000000
1067 };
1068
1069 static const u32 spectre_golden_common_registers[] =
1070 {
1071         0xc770, 0xffffffff, 0x00000800,
1072         0xc774, 0xffffffff, 0x00000800,
1073         0xc798, 0xffffffff, 0x00007fbf,
1074         0xc79c, 0xffffffff, 0x00007faf
1075 };
1076
1077 static const u32 spectre_golden_registers[] =
1078 {
1079         0x3c000, 0xffff1fff, 0x96940200,
1080         0x3c00c, 0xffff0001, 0xff000000,
1081         0x3c200, 0xfffc0fff, 0x00000100,
1082         0x6ed8, 0x00010101, 0x00010000,
1083         0x9834, 0xf00fffff, 0x00000400,
1084         0x9838, 0xfffffffc, 0x00020200,
1085         0x5bb0, 0x000000f0, 0x00000070,
1086         0x5bc0, 0xf0311fff, 0x80300000,
1087         0x98f8, 0x73773777, 0x12010001,
1088         0x9b7c, 0x00ff0000, 0x00fc0000,
1089         0x2f48, 0x73773777, 0x12010001,
1090         0x8a14, 0xf000003f, 0x00000007,
1091         0x8b24, 0xffffffff, 0x00ffffff,
1092         0x28350, 0x3f3f3fff, 0x00000082,
1093         0x28355, 0x0000003f, 0x00000000,
1094         0x3e78, 0x00000001, 0x00000002,
1095         0x913c, 0xffff03df, 0x00000004,
1096         0xc768, 0x00000008, 0x00000008,
1097         0x8c00, 0x000008ff, 0x00000800,
1098         0x9508, 0x00010000, 0x00010000,
1099         0xac0c, 0xffffffff, 0x54763210,
1100         0x214f8, 0x01ff01ff, 0x00000002,
1101         0x21498, 0x007ff800, 0x00200000,
1102         0x2015c, 0xffffffff, 0x00000f40,
1103         0x30934, 0xffffffff, 0x00000001
1104 };
1105
1106 static const u32 spectre_mgcg_cgcg_init[] =
1107 {
1108         0xc420, 0xffffffff, 0xfffffffc,
1109         0x30800, 0xffffffff, 0xe0000000,
1110         0x3c2a0, 0xffffffff, 0x00000100,
1111         0x3c208, 0xffffffff, 0x00000100,
1112         0x3c2c0, 0xffffffff, 0x00000100,
1113         0x3c2c8, 0xffffffff, 0x00000100,
1114         0x3c2c4, 0xffffffff, 0x00000100,
1115         0x55e4, 0xffffffff, 0x00600100,
1116         0x3c280, 0xffffffff, 0x00000100,
1117         0x3c214, 0xffffffff, 0x06000100,
1118         0x3c220, 0xffffffff, 0x00000100,
1119         0x3c218, 0xffffffff, 0x06000100,
1120         0x3c204, 0xffffffff, 0x00000100,
1121         0x3c2e0, 0xffffffff, 0x00000100,
1122         0x3c224, 0xffffffff, 0x00000100,
1123         0x3c200, 0xffffffff, 0x00000100,
1124         0x3c230, 0xffffffff, 0x00000100,
1125         0x3c234, 0xffffffff, 0x00000100,
1126         0x3c250, 0xffffffff, 0x00000100,
1127         0x3c254, 0xffffffff, 0x00000100,
1128         0x3c258, 0xffffffff, 0x00000100,
1129         0x3c25c, 0xffffffff, 0x00000100,
1130         0x3c260, 0xffffffff, 0x00000100,
1131         0x3c27c, 0xffffffff, 0x00000100,
1132         0x3c278, 0xffffffff, 0x00000100,
1133         0x3c210, 0xffffffff, 0x06000100,
1134         0x3c290, 0xffffffff, 0x00000100,
1135         0x3c274, 0xffffffff, 0x00000100,
1136         0x3c2b4, 0xffffffff, 0x00000100,
1137         0x3c2b0, 0xffffffff, 0x00000100,
1138         0x3c270, 0xffffffff, 0x00000100,
1139         0x30800, 0xffffffff, 0xe0000000,
1140         0x3c020, 0xffffffff, 0x00010000,
1141         0x3c024, 0xffffffff, 0x00030002,
1142         0x3c028, 0xffffffff, 0x00040007,
1143         0x3c02c, 0xffffffff, 0x00060005,
1144         0x3c030, 0xffffffff, 0x00090008,
1145         0x3c034, 0xffffffff, 0x00010000,
1146         0x3c038, 0xffffffff, 0x00030002,
1147         0x3c03c, 0xffffffff, 0x00040007,
1148         0x3c040, 0xffffffff, 0x00060005,
1149         0x3c044, 0xffffffff, 0x00090008,
1150         0x3c048, 0xffffffff, 0x00010000,
1151         0x3c04c, 0xffffffff, 0x00030002,
1152         0x3c050, 0xffffffff, 0x00040007,
1153         0x3c054, 0xffffffff, 0x00060005,
1154         0x3c058, 0xffffffff, 0x00090008,
1155         0x3c05c, 0xffffffff, 0x00010000,
1156         0x3c060, 0xffffffff, 0x00030002,
1157         0x3c064, 0xffffffff, 0x00040007,
1158         0x3c068, 0xffffffff, 0x00060005,
1159         0x3c06c, 0xffffffff, 0x00090008,
1160         0x3c070, 0xffffffff, 0x00010000,
1161         0x3c074, 0xffffffff, 0x00030002,
1162         0x3c078, 0xffffffff, 0x00040007,
1163         0x3c07c, 0xffffffff, 0x00060005,
1164         0x3c080, 0xffffffff, 0x00090008,
1165         0x3c084, 0xffffffff, 0x00010000,
1166         0x3c088, 0xffffffff, 0x00030002,
1167         0x3c08c, 0xffffffff, 0x00040007,
1168         0x3c090, 0xffffffff, 0x00060005,
1169         0x3c094, 0xffffffff, 0x00090008,
1170         0x3c098, 0xffffffff, 0x00010000,
1171         0x3c09c, 0xffffffff, 0x00030002,
1172         0x3c0a0, 0xffffffff, 0x00040007,
1173         0x3c0a4, 0xffffffff, 0x00060005,
1174         0x3c0a8, 0xffffffff, 0x00090008,
1175         0x3c0ac, 0xffffffff, 0x00010000,
1176         0x3c0b0, 0xffffffff, 0x00030002,
1177         0x3c0b4, 0xffffffff, 0x00040007,
1178         0x3c0b8, 0xffffffff, 0x00060005,
1179         0x3c0bc, 0xffffffff, 0x00090008,
1180         0x3c000, 0xffffffff, 0x96e00200,
1181         0x8708, 0xffffffff, 0x00900100,
1182         0xc424, 0xffffffff, 0x0020003f,
1183         0x38, 0xffffffff, 0x0140001c,
1184         0x3c, 0x000f0000, 0x000f0000,
1185         0x220, 0xffffffff, 0xC060000C,
1186         0x224, 0xc0000fff, 0x00000100,
1187         0xf90, 0xffffffff, 0x00000100,
1188         0xf98, 0x00000101, 0x00000000,
1189         0x20a8, 0xffffffff, 0x00000104,
1190         0x55e4, 0xff000fff, 0x00000100,
1191         0x30cc, 0xc0000fff, 0x00000104,
1192         0xc1e4, 0x00000001, 0x00000001,
1193         0xd00c, 0xff000ff0, 0x00000100,
1194         0xd80c, 0xff000ff0, 0x00000100
1195 };
1196
1197 static const u32 kalindi_golden_spm_registers[] =
1198 {
1199         0x30800, 0xe0ffffff, 0xe0000000
1200 };
1201
1202 static const u32 kalindi_golden_common_registers[] =
1203 {
1204         0xc770, 0xffffffff, 0x00000800,
1205         0xc774, 0xffffffff, 0x00000800,
1206         0xc798, 0xffffffff, 0x00007fbf,
1207         0xc79c, 0xffffffff, 0x00007faf
1208 };
1209
1210 static const u32 kalindi_golden_registers[] =
1211 {
1212         0x3c000, 0xffffdfff, 0x6e944040,
1213         0x55e4, 0xff607fff, 0xfc000100,
1214         0x3c220, 0xff000fff, 0x00000100,
1215         0x3c224, 0xff000fff, 0x00000100,
1216         0x3c200, 0xfffc0fff, 0x00000100,
1217         0x6ed8, 0x00010101, 0x00010000,
1218         0x9830, 0xffffffff, 0x00000000,
1219         0x9834, 0xf00fffff, 0x00000400,
1220         0x5bb0, 0x000000f0, 0x00000070,
1221         0x5bc0, 0xf0311fff, 0x80300000,
1222         0x98f8, 0x73773777, 0x12010001,
1223         0x98fc, 0xffffffff, 0x00000010,
1224         0x9b7c, 0x00ff0000, 0x00fc0000,
1225         0x8030, 0x00001f0f, 0x0000100a,
1226         0x2f48, 0x73773777, 0x12010001,
1227         0x2408, 0x000fffff, 0x000c007f,
1228         0x8a14, 0xf000003f, 0x00000007,
1229         0x8b24, 0x3fff3fff, 0x00ffcfff,
1230         0x30a04, 0x0000ff0f, 0x00000000,
1231         0x28a4c, 0x07ffffff, 0x06000000,
1232         0x4d8, 0x00000fff, 0x00000100,
1233         0x3e78, 0x00000001, 0x00000002,
1234         0xc768, 0x00000008, 0x00000008,
1235         0x8c00, 0x000000ff, 0x00000003,
1236         0x214f8, 0x01ff01ff, 0x00000002,
1237         0x21498, 0x007ff800, 0x00200000,
1238         0x2015c, 0xffffffff, 0x00000f40,
1239         0x88c4, 0x001f3ae3, 0x00000082,
1240         0x88d4, 0x0000001f, 0x00000010,
1241         0x30934, 0xffffffff, 0x00000000
1242 };
1243
1244 static const u32 kalindi_mgcg_cgcg_init[] =
1245 {
1246         0xc420, 0xffffffff, 0xfffffffc,
1247         0x30800, 0xffffffff, 0xe0000000,
1248         0x3c2a0, 0xffffffff, 0x00000100,
1249         0x3c208, 0xffffffff, 0x00000100,
1250         0x3c2c0, 0xffffffff, 0x00000100,
1251         0x3c2c8, 0xffffffff, 0x00000100,
1252         0x3c2c4, 0xffffffff, 0x00000100,
1253         0x55e4, 0xffffffff, 0x00600100,
1254         0x3c280, 0xffffffff, 0x00000100,
1255         0x3c214, 0xffffffff, 0x06000100,
1256         0x3c220, 0xffffffff, 0x00000100,
1257         0x3c218, 0xffffffff, 0x06000100,
1258         0x3c204, 0xffffffff, 0x00000100,
1259         0x3c2e0, 0xffffffff, 0x00000100,
1260         0x3c224, 0xffffffff, 0x00000100,
1261         0x3c200, 0xffffffff, 0x00000100,
1262         0x3c230, 0xffffffff, 0x00000100,
1263         0x3c234, 0xffffffff, 0x00000100,
1264         0x3c250, 0xffffffff, 0x00000100,
1265         0x3c254, 0xffffffff, 0x00000100,
1266         0x3c258, 0xffffffff, 0x00000100,
1267         0x3c25c, 0xffffffff, 0x00000100,
1268         0x3c260, 0xffffffff, 0x00000100,
1269         0x3c27c, 0xffffffff, 0x00000100,
1270         0x3c278, 0xffffffff, 0x00000100,
1271         0x3c210, 0xffffffff, 0x06000100,
1272         0x3c290, 0xffffffff, 0x00000100,
1273         0x3c274, 0xffffffff, 0x00000100,
1274         0x3c2b4, 0xffffffff, 0x00000100,
1275         0x3c2b0, 0xffffffff, 0x00000100,
1276         0x3c270, 0xffffffff, 0x00000100,
1277         0x30800, 0xffffffff, 0xe0000000,
1278         0x3c020, 0xffffffff, 0x00010000,
1279         0x3c024, 0xffffffff, 0x00030002,
1280         0x3c028, 0xffffffff, 0x00040007,
1281         0x3c02c, 0xffffffff, 0x00060005,
1282         0x3c030, 0xffffffff, 0x00090008,
1283         0x3c034, 0xffffffff, 0x00010000,
1284         0x3c038, 0xffffffff, 0x00030002,
1285         0x3c03c, 0xffffffff, 0x00040007,
1286         0x3c040, 0xffffffff, 0x00060005,
1287         0x3c044, 0xffffffff, 0x00090008,
1288         0x3c000, 0xffffffff, 0x96e00200,
1289         0x8708, 0xffffffff, 0x00900100,
1290         0xc424, 0xffffffff, 0x0020003f,
1291         0x38, 0xffffffff, 0x0140001c,
1292         0x3c, 0x000f0000, 0x000f0000,
1293         0x220, 0xffffffff, 0xC060000C,
1294         0x224, 0xc0000fff, 0x00000100,
1295         0x20a8, 0xffffffff, 0x00000104,
1296         0x55e4, 0xff000fff, 0x00000100,
1297         0x30cc, 0xc0000fff, 0x00000104,
1298         0xc1e4, 0x00000001, 0x00000001,
1299         0xd00c, 0xff000ff0, 0x00000100,
1300         0xd80c, 0xff000ff0, 0x00000100
1301 };
1302
1303 static void cik_init_golden_registers(struct radeon_device *rdev)
1304 {
1305         switch (rdev->family) {
1306         case CHIP_BONAIRE:
1307                 radeon_program_register_sequence(rdev,
1308                                                  bonaire_mgcg_cgcg_init,
1309                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1310                 radeon_program_register_sequence(rdev,
1311                                                  bonaire_golden_registers,
1312                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1313                 radeon_program_register_sequence(rdev,
1314                                                  bonaire_golden_common_registers,
1315                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1316                 radeon_program_register_sequence(rdev,
1317                                                  bonaire_golden_spm_registers,
1318                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1319                 break;
1320         case CHIP_KABINI:
1321                 radeon_program_register_sequence(rdev,
1322                                                  kalindi_mgcg_cgcg_init,
1323                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1324                 radeon_program_register_sequence(rdev,
1325                                                  kalindi_golden_registers,
1326                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1327                 radeon_program_register_sequence(rdev,
1328                                                  kalindi_golden_common_registers,
1329                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1330                 radeon_program_register_sequence(rdev,
1331                                                  kalindi_golden_spm_registers,
1332                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1333                 break;
1334         case CHIP_KAVERI:
1335                 radeon_program_register_sequence(rdev,
1336                                                  spectre_mgcg_cgcg_init,
1337                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1338                 radeon_program_register_sequence(rdev,
1339                                                  spectre_golden_registers,
1340                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1341                 radeon_program_register_sequence(rdev,
1342                                                  spectre_golden_common_registers,
1343                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1344                 radeon_program_register_sequence(rdev,
1345                                                  spectre_golden_spm_registers,
1346                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1347                 break;
1348         default:
1349                 break;
1350         }
1351 }
1352
1353 /**
1354  * cik_get_xclk - get the xclk
1355  *
1356  * @rdev: radeon_device pointer
1357  *
1358  * Returns the reference clock used by the gfx engine
1359  * (CIK).
1360  */
1361 u32 cik_get_xclk(struct radeon_device *rdev)
1362 {
1363         u32 reference_clock = rdev->clock.spll.reference_freq;
1364
1365         if (rdev->flags & RADEON_IS_IGP) {
1366                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1367                         return reference_clock / 2;
1368         } else {
1369                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1370                         return reference_clock / 4;
1371         }
1372         return reference_clock;
1373 }
1374
1375 /**
1376  * cik_mm_rdoorbell - read a doorbell dword
1377  *
1378  * @rdev: radeon_device pointer
1379  * @offset: byte offset into the aperture
1380  *
1381  * Returns the value in the doorbell aperture at the
1382  * requested offset (CIK).
1383  */
1384 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1385 {
1386         if (offset < rdev->doorbell.size) {
1387                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1388         } else {
1389                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1390                 return 0;
1391         }
1392 }
1393
1394 /**
1395  * cik_mm_wdoorbell - write a doorbell dword
1396  *
1397  * @rdev: radeon_device pointer
1398  * @offset: byte offset into the aperture
1399  * @v: value to write
1400  *
1401  * Writes @v to the doorbell aperture at the
1402  * requested offset (CIK).
1403  */
1404 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1405 {
1406         if (offset < rdev->doorbell.size) {
1407                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1408         } else {
1409                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1410         }
1411 }
1412
1413 #define BONAIRE_IO_MC_REGS_SIZE 36
1414
1415 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1416 {
1417         {0x00000070, 0x04400000},
1418         {0x00000071, 0x80c01803},
1419         {0x00000072, 0x00004004},
1420         {0x00000073, 0x00000100},
1421         {0x00000074, 0x00ff0000},
1422         {0x00000075, 0x34000000},
1423         {0x00000076, 0x08000014},
1424         {0x00000077, 0x00cc08ec},
1425         {0x00000078, 0x00000400},
1426         {0x00000079, 0x00000000},
1427         {0x0000007a, 0x04090000},
1428         {0x0000007c, 0x00000000},
1429         {0x0000007e, 0x4408a8e8},
1430         {0x0000007f, 0x00000304},
1431         {0x00000080, 0x00000000},
1432         {0x00000082, 0x00000001},
1433         {0x00000083, 0x00000002},
1434         {0x00000084, 0xf3e4f400},
1435         {0x00000085, 0x052024e3},
1436         {0x00000087, 0x00000000},
1437         {0x00000088, 0x01000000},
1438         {0x0000008a, 0x1c0a0000},
1439         {0x0000008b, 0xff010000},
1440         {0x0000008d, 0xffffefff},
1441         {0x0000008e, 0xfff3efff},
1442         {0x0000008f, 0xfff3efbf},
1443         {0x00000092, 0xf7ffffff},
1444         {0x00000093, 0xffffff7f},
1445         {0x00000095, 0x00101101},
1446         {0x00000096, 0x00000fff},
1447         {0x00000097, 0x00116fff},
1448         {0x00000098, 0x60010000},
1449         {0x00000099, 0x10010000},
1450         {0x0000009a, 0x00006000},
1451         {0x0000009b, 0x00001000},
1452         {0x0000009f, 0x00b48000}
1453 };
1454
1455 /**
1456  * cik_srbm_select - select specific register instances
1457  *
1458  * @rdev: radeon_device pointer
1459  * @me: selected ME (micro engine)
1460  * @pipe: pipe
1461  * @queue: queue
1462  * @vmid: VMID
1463  *
1464  * Switches the currently active registers instances.  Some
1465  * registers are instanced per VMID, others are instanced per
1466  * me/pipe/queue combination.
1467  */
1468 static void cik_srbm_select(struct radeon_device *rdev,
1469                             u32 me, u32 pipe, u32 queue, u32 vmid)
1470 {
1471         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1472                              MEID(me & 0x3) |
1473                              VMID(vmid & 0xf) |
1474                              QUEUEID(queue & 0x7));
1475         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1476 }
1477
1478 /* ucode loading */
1479 /**
1480  * ci_mc_load_microcode - load MC ucode into the hw
1481  *
1482  * @rdev: radeon_device pointer
1483  *
1484  * Load the GDDR MC ucode into the hw (CIK).
1485  * Returns 0 on success, error on failure.
1486  */
1487 static int ci_mc_load_microcode(struct radeon_device *rdev)
1488 {
1489         const __be32 *fw_data;
1490         u32 running, blackout = 0;
1491         u32 *io_mc_regs;
1492         int i, ucode_size, regs_size;
1493
1494         if (!rdev->mc_fw)
1495                 return -EINVAL;
1496
1497         switch (rdev->family) {
1498         case CHIP_BONAIRE:
1499         default:
1500                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1501                 ucode_size = CIK_MC_UCODE_SIZE;
1502                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1503                 break;
1504         }
1505
1506         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1507
1508         if (running == 0) {
1509                 if (running) {
1510                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1511                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1512                 }
1513
1514                 /* reset the engine and set to writable */
1515                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1516                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1517
1518                 /* load mc io regs */
1519                 for (i = 0; i < regs_size; i++) {
1520                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1521                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1522                 }
1523                 /* load the MC ucode */
1524                 fw_data = (const __be32 *)rdev->mc_fw->data;
1525                 for (i = 0; i < ucode_size; i++)
1526                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1527
1528                 /* put the engine back into the active state */
1529                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1530                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1532
1533                 /* wait for training to complete */
1534                 for (i = 0; i < rdev->usec_timeout; i++) {
1535                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1536                                 break;
1537                         udelay(1);
1538                 }
1539                 for (i = 0; i < rdev->usec_timeout; i++) {
1540                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1541                                 break;
1542                         udelay(1);
1543                 }
1544
1545                 if (running)
1546                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1547         }
1548
1549         return 0;
1550 }
1551
1552 /**
1553  * cik_init_microcode - load ucode images from disk
1554  *
1555  * @rdev: radeon_device pointer
1556  *
1557  * Use the firmware interface to load the ucode images into
1558  * the driver (not loaded into hw).
1559  * Returns 0 on success, error on failure.
1560  */
1561 static int cik_init_microcode(struct radeon_device *rdev)
1562 {
1563         const char *chip_name;
1564         size_t pfp_req_size, me_req_size, ce_req_size,
1565                 mec_req_size, rlc_req_size, mc_req_size,
1566                 sdma_req_size, smc_req_size;
1567         char fw_name[30];
1568         int err;
1569
1570         DRM_DEBUG("\n");
1571
1572         switch (rdev->family) {
1573         case CHIP_BONAIRE:
1574                 chip_name = "BONAIRE";
1575                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1576                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1577                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1578                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1579                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1580                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1581                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1582                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1583                 break;
1584         case CHIP_KAVERI:
1585                 chip_name = "KAVERI";
1586                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1591                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1592                 break;
1593         case CHIP_KABINI:
1594                 chip_name = "KABINI";
1595                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1596                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1597                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1598                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1599                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1600                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1601                 break;
1602         default: BUG();
1603         }
1604
1605         DRM_INFO("Loading %s Microcode\n", chip_name);
1606
1607         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1608         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1609         if (err)
1610                 goto out;
1611         if (rdev->pfp_fw->size != pfp_req_size) {
1612                 printk(KERN_ERR
1613                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1614                        rdev->pfp_fw->size, fw_name);
1615                 err = -EINVAL;
1616                 goto out;
1617         }
1618
1619         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1620         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1621         if (err)
1622                 goto out;
1623         if (rdev->me_fw->size != me_req_size) {
1624                 printk(KERN_ERR
1625                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1626                        rdev->me_fw->size, fw_name);
1627                 err = -EINVAL;
1628         }
1629
1630         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1631         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1632         if (err)
1633                 goto out;
1634         if (rdev->ce_fw->size != ce_req_size) {
1635                 printk(KERN_ERR
1636                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1637                        rdev->ce_fw->size, fw_name);
1638                 err = -EINVAL;
1639         }
1640
1641         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1642         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1643         if (err)
1644                 goto out;
1645         if (rdev->mec_fw->size != mec_req_size) {
1646                 printk(KERN_ERR
1647                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1648                        rdev->mec_fw->size, fw_name);
1649                 err = -EINVAL;
1650         }
1651
1652         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1653         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1654         if (err)
1655                 goto out;
1656         if (rdev->rlc_fw->size != rlc_req_size) {
1657                 printk(KERN_ERR
1658                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1659                        rdev->rlc_fw->size, fw_name);
1660                 err = -EINVAL;
1661         }
1662
1663         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1664         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1665         if (err)
1666                 goto out;
1667         if (rdev->sdma_fw->size != sdma_req_size) {
1668                 printk(KERN_ERR
1669                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1670                        rdev->sdma_fw->size, fw_name);
1671                 err = -EINVAL;
1672         }
1673
1674         /* No SMC, MC ucode on APUs */
1675         if (!(rdev->flags & RADEON_IS_IGP)) {
1676                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1677                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1678                 if (err)
1679                         goto out;
1680                 if (rdev->mc_fw->size != mc_req_size) {
1681                         printk(KERN_ERR
1682                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1683                                rdev->mc_fw->size, fw_name);
1684                         err = -EINVAL;
1685                 }
1686
1687                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1688                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1689                 if (err) {
1690                         printk(KERN_ERR
1691                                "smc: error loading firmware \"%s\"\n",
1692                                fw_name);
1693                         release_firmware(rdev->smc_fw);
1694                         rdev->smc_fw = NULL;
1695                 } else if (rdev->smc_fw->size != smc_req_size) {
1696                         printk(KERN_ERR
1697                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1698                                rdev->smc_fw->size, fw_name);
1699                         err = -EINVAL;
1700                 }
1701         }
1702
1703 out:
1704         if (err) {
1705                 if (err != -EINVAL)
1706                         printk(KERN_ERR
1707                                "cik_cp: Failed to load firmware \"%s\"\n",
1708                                fw_name);
1709                 release_firmware(rdev->pfp_fw);
1710                 rdev->pfp_fw = NULL;
1711                 release_firmware(rdev->me_fw);
1712                 rdev->me_fw = NULL;
1713                 release_firmware(rdev->ce_fw);
1714                 rdev->ce_fw = NULL;
1715                 release_firmware(rdev->rlc_fw);
1716                 rdev->rlc_fw = NULL;
1717                 release_firmware(rdev->mc_fw);
1718                 rdev->mc_fw = NULL;
1719                 release_firmware(rdev->smc_fw);
1720                 rdev->smc_fw = NULL;
1721         }
1722         return err;
1723 }
1724
1725 /*
1726  * Core functions
1727  */
1728 /**
1729  * cik_tiling_mode_table_init - init the hw tiling table
1730  *
1731  * @rdev: radeon_device pointer
1732  *
1733  * Starting with SI, the tiling setup is done globally in a
1734  * set of 32 tiling modes.  Rather than selecting each set of
1735  * parameters per surface as on older asics, we just select
1736  * which index in the tiling table we want to use, and the
1737  * surface uses those parameters (CIK).
1738  */
1739 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1740 {
1741         const u32 num_tile_mode_states = 32;
1742         const u32 num_secondary_tile_mode_states = 16;
1743         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1744         u32 num_pipe_configs;
1745         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1746                 rdev->config.cik.max_shader_engines;
1747
1748         switch (rdev->config.cik.mem_row_size_in_kb) {
1749         case 1:
1750                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1751                 break;
1752         case 2:
1753         default:
1754                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1755                 break;
1756         case 4:
1757                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1758                 break;
1759         }
1760
1761         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1762         if (num_pipe_configs > 8)
1763                 num_pipe_configs = 8; /* ??? */
1764
1765         if (num_pipe_configs == 8) {
1766                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767                         switch (reg_offset) {
1768                         case 0:
1769                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1771                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1772                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1773                                 break;
1774                         case 1:
1775                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1776                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1777                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1778                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1779                                 break;
1780                         case 2:
1781                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1782                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1783                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1784                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1785                                 break;
1786                         case 3:
1787                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1788                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1791                                 break;
1792                         case 4:
1793                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1794                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796                                                  TILE_SPLIT(split_equal_to_row_size));
1797                                 break;
1798                         case 5:
1799                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1800                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1801                                 break;
1802                         case 6:
1803                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1804                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1805                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1806                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1807                                 break;
1808                         case 7:
1809                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1810                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1811                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1812                                                  TILE_SPLIT(split_equal_to_row_size));
1813                                 break;
1814                         case 8:
1815                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1816                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1817                                 break;
1818                         case 9:
1819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1820                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1821                                 break;
1822                         case 10:
1823                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1824                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1825                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1826                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1827                                 break;
1828                         case 11:
1829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1830                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1831                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833                                 break;
1834                         case 12:
1835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1836                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1837                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1838                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839                                 break;
1840                         case 13:
1841                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1842                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1843                                 break;
1844                         case 14:
1845                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1847                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1848                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1849                                 break;
1850                         case 16:
1851                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1852                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1853                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1854                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855                                 break;
1856                         case 17:
1857                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1858                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1859                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1860                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861                                 break;
1862                         case 27:
1863                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1865                                 break;
1866                         case 28:
1867                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1869                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1870                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871                                 break;
1872                         case 29:
1873                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1875                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1876                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1877                                 break;
1878                         case 30:
1879                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1880                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1881                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1882                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883                                 break;
1884                         default:
1885                                 gb_tile_moden = 0;
1886                                 break;
1887                         }
1888                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1889                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1890                 }
1891                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1892                         switch (reg_offset) {
1893                         case 0:
1894                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1896                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1897                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1898                                 break;
1899                         case 1:
1900                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1901                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1902                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1903                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1904                                 break;
1905                         case 2:
1906                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1907                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1908                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1909                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1910                                 break;
1911                         case 3:
1912                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1914                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1915                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1916                                 break;
1917                         case 4:
1918                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1919                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1920                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1921                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1922                                 break;
1923                         case 5:
1924                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1926                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1927                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1928                                 break;
1929                         case 6:
1930                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1931                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1932                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1933                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1934                                 break;
1935                         case 8:
1936                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1937                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1938                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1939                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1940                                 break;
1941                         case 9:
1942                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1943                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1944                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1945                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1946                                 break;
1947                         case 10:
1948                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1949                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1950                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1951                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1952                                 break;
1953                         case 11:
1954                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1955                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1956                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1957                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1958                                 break;
1959                         case 12:
1960                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1962                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1963                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1964                                 break;
1965                         case 13:
1966                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1967                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1968                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1969                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1970                                 break;
1971                         case 14:
1972                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1976                                 break;
1977                         default:
1978                                 gb_tile_moden = 0;
1979                                 break;
1980                         }
1981                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1982                 }
1983         } else if (num_pipe_configs == 4) {
1984                 if (num_rbs == 4) {
1985                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1986                                 switch (reg_offset) {
1987                                 case 0:
1988                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1989                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1990                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1991                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1992                                         break;
1993                                 case 1:
1994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1996                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1997                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1998                                         break;
1999                                 case 2:
2000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2003                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2004                                         break;
2005                                 case 3:
2006                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2010                                         break;
2011                                 case 4:
2012                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015                                                          TILE_SPLIT(split_equal_to_row_size));
2016                                         break;
2017                                 case 5:
2018                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2019                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2020                                         break;
2021                                 case 6:
2022                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2023                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2024                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2025                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2026                                         break;
2027                                 case 7:
2028                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2029                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2030                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2031                                                          TILE_SPLIT(split_equal_to_row_size));
2032                                         break;
2033                                 case 8:
2034                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2035                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2036                                         break;
2037                                 case 9:
2038                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2040                                         break;
2041                                 case 10:
2042                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2044                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2045                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2046                                         break;
2047                                 case 11:
2048                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2049                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2050                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2051                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2052                                         break;
2053                                 case 12:
2054                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2055                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2057                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                         break;
2059                                 case 13:
2060                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2062                                         break;
2063                                 case 14:
2064                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2068                                         break;
2069                                 case 16:
2070                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2071                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2072                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074                                         break;
2075                                 case 17:
2076                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2077                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2079                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                         break;
2081                                 case 27:
2082                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2084                                         break;
2085                                 case 28:
2086                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2087                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2088                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2089                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                                         break;
2091                                 case 29:
2092                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2093                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2094                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2095                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096                                         break;
2097                                 case 30:
2098                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2099                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2101                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                         break;
2103                                 default:
2104                                         gb_tile_moden = 0;
2105                                         break;
2106                                 }
2107                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2108                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2109                         }
2110                 } else if (num_rbs < 4) {
2111                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2112                                 switch (reg_offset) {
2113                                 case 0:
2114                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2116                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2117                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2118                                         break;
2119                                 case 1:
2120                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2122                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2124                                         break;
2125                                 case 2:
2126                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2128                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2129                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2130                                         break;
2131                                 case 3:
2132                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2136                                         break;
2137                                 case 4:
2138                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141                                                          TILE_SPLIT(split_equal_to_row_size));
2142                                         break;
2143                                 case 5:
2144                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146                                         break;
2147                                 case 6:
2148                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2149                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2150                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2151                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2152                                         break;
2153                                 case 7:
2154                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2155                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2156                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2157                                                          TILE_SPLIT(split_equal_to_row_size));
2158                                         break;
2159                                 case 8:
2160                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2161                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2162                                         break;
2163                                 case 9:
2164                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2166                                         break;
2167                                 case 10:
2168                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2170                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2171                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172                                         break;
2173                                 case 11:
2174                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178                                         break;
2179                                 case 12:
2180                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2181                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2182                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                                         break;
2185                                 case 13:
2186                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2188                                         break;
2189                                 case 14:
2190                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2193                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194                                         break;
2195                                 case 16:
2196                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2197                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200                                         break;
2201                                 case 17:
2202                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2203                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206                                         break;
2207                                 case 27:
2208                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2210                                         break;
2211                                 case 28:
2212                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2213                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2214                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2215                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216                                         break;
2217                                 case 29:
2218                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2219                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2221                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222                                         break;
2223                                 case 30:
2224                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2225                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2226                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2227                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228                                         break;
2229                                 default:
2230                                         gb_tile_moden = 0;
2231                                         break;
2232                                 }
2233                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2234                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2235                         }
2236                 }
2237                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2238                         switch (reg_offset) {
2239                         case 0:
2240                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2242                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2243                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2244                                 break;
2245                         case 1:
2246                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2248                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2250                                 break;
2251                         case 2:
2252                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2253                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2254                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2256                                 break;
2257                         case 3:
2258                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2262                                 break;
2263                         case 4:
2264                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2268                                 break;
2269                         case 5:
2270                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2274                                 break;
2275                         case 6:
2276                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2279                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2280                                 break;
2281                         case 8:
2282                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2283                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2284                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2286                                 break;
2287                         case 9:
2288                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2292                                 break;
2293                         case 10:
2294                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2298                                 break;
2299                         case 11:
2300                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2304                                 break;
2305                         case 12:
2306                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2309                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2310                                 break;
2311                         case 13:
2312                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2313                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2314                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2315                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2316                                 break;
2317                         case 14:
2318                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2321                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2322                                 break;
2323                         default:
2324                                 gb_tile_moden = 0;
2325                                 break;
2326                         }
2327                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2328                 }
2329         } else if (num_pipe_configs == 2) {
2330                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2331                         switch (reg_offset) {
2332                         case 0:
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2337                                 break;
2338                         case 1:
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2342                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2343                                 break;
2344                         case 2:
2345                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2347                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2348                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2349                                 break;
2350                         case 3:
2351                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2354                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2355                                 break;
2356                         case 4:
2357                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2360                                                  TILE_SPLIT(split_equal_to_row_size));
2361                                 break;
2362                         case 5:
2363                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2364                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2365                                 break;
2366                         case 6:
2367                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2370                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2371                                 break;
2372                         case 7:
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2376                                                  TILE_SPLIT(split_equal_to_row_size));
2377                                 break;
2378                         case 8:
2379                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2380                                 break;
2381                         case 9:
2382                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2384                                 break;
2385                         case 10:
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2389                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390                                 break;
2391                         case 11:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2395                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396                                 break;
2397                         case 12:
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2401                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402                                 break;
2403                         case 13:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2406                                 break;
2407                         case 14:
2408                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2411                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412                                 break;
2413                         case 16:
2414                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2417                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418                                 break;
2419                         case 17:
2420                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2421                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2423                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424                                 break;
2425                         case 27:
2426                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2428                                 break;
2429                         case 28:
2430                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2431                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2433                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                                 break;
2435                         case 29:
2436                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2439                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                                 break;
2441                         case 30:
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2445                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446                                 break;
2447                         default:
2448                                 gb_tile_moden = 0;
2449                                 break;
2450                         }
2451                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2452                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2453                 }
2454                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2455                         switch (reg_offset) {
2456                         case 0:
2457                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2460                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2461                                 break;
2462                         case 1:
2463                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2464                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2466                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2467                                 break;
2468                         case 2:
2469                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2471                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2472                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2473                                 break;
2474                         case 3:
2475                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2478                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2479                                 break;
2480                         case 4:
2481                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2484                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2485                                 break;
2486                         case 5:
2487                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2491                                 break;
2492                         case 6:
2493                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2497                                 break;
2498                         case 8:
2499                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2500                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2501                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2502                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2503                                 break;
2504                         case 9:
2505                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2506                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2509                                 break;
2510                         case 10:
2511                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2512                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2515                                 break;
2516                         case 11:
2517                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2518                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2520                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2521                                 break;
2522                         case 12:
2523                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2525                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2527                                 break;
2528                         case 13:
2529                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2531                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2532                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2533                                 break;
2534                         case 14:
2535                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2537                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2538                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2539                                 break;
2540                         default:
2541                                 gb_tile_moden = 0;
2542                                 break;
2543                         }
2544                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2545                 }
2546         } else
2547                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2548 }
2549
2550 /**
2551  * cik_select_se_sh - select which SE, SH to address
2552  *
2553  * @rdev: radeon_device pointer
2554  * @se_num: shader engine to address
2555  * @sh_num: sh block to address
2556  *
2557  * Select which SE, SH combinations to address. Certain
2558  * registers are instanced per SE or SH.  0xffffffff means
2559  * broadcast to all SEs or SHs (CIK).
2560  */
2561 static void cik_select_se_sh(struct radeon_device *rdev,
2562                              u32 se_num, u32 sh_num)
2563 {
2564         u32 data = INSTANCE_BROADCAST_WRITES;
2565
2566         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2567                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2568         else if (se_num == 0xffffffff)
2569                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2570         else if (sh_num == 0xffffffff)
2571                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2572         else
2573                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2574         WREG32(GRBM_GFX_INDEX, data);
2575 }
2576
2577 /**
2578  * cik_create_bitmask - create a bitmask
2579  *
2580  * @bit_width: length of the mask
2581  *
2582  * create a variable length bit mask (CIK).
2583  * Returns the bitmask.
2584  */
2585 static u32 cik_create_bitmask(u32 bit_width)
2586 {
2587         u32 i, mask = 0;
2588
2589         for (i = 0; i < bit_width; i++) {
2590                 mask <<= 1;
2591                 mask |= 1;
2592         }
2593         return mask;
2594 }
2595
2596 /**
2597  * cik_select_se_sh - select which SE, SH to address
2598  *
2599  * @rdev: radeon_device pointer
2600  * @max_rb_num: max RBs (render backends) for the asic
2601  * @se_num: number of SEs (shader engines) for the asic
2602  * @sh_per_se: number of SH blocks per SE for the asic
2603  *
2604  * Calculates the bitmask of disabled RBs (CIK).
2605  * Returns the disabled RB bitmask.
2606  */
2607 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2608                               u32 max_rb_num, u32 se_num,
2609                               u32 sh_per_se)
2610 {
2611         u32 data, mask;
2612
2613         data = RREG32(CC_RB_BACKEND_DISABLE);
2614         if (data & 1)
2615                 data &= BACKEND_DISABLE_MASK;
2616         else
2617                 data = 0;
2618         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2619
2620         data >>= BACKEND_DISABLE_SHIFT;
2621
2622         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2623
2624         return data & mask;
2625 }
2626
2627 /**
2628  * cik_setup_rb - setup the RBs on the asic
2629  *
2630  * @rdev: radeon_device pointer
2631  * @se_num: number of SEs (shader engines) for the asic
2632  * @sh_per_se: number of SH blocks per SE for the asic
2633  * @max_rb_num: max RBs (render backends) for the asic
2634  *
2635  * Configures per-SE/SH RB registers (CIK).
2636  */
2637 static void cik_setup_rb(struct radeon_device *rdev,
2638                          u32 se_num, u32 sh_per_se,
2639                          u32 max_rb_num)
2640 {
2641         int i, j;
2642         u32 data, mask;
2643         u32 disabled_rbs = 0;
2644         u32 enabled_rbs = 0;
2645
2646         for (i = 0; i < se_num; i++) {
2647                 for (j = 0; j < sh_per_se; j++) {
2648                         cik_select_se_sh(rdev, i, j);
2649                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2650                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2651                 }
2652         }
2653         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2654
2655         mask = 1;
2656         for (i = 0; i < max_rb_num; i++) {
2657                 if (!(disabled_rbs & mask))
2658                         enabled_rbs |= mask;
2659                 mask <<= 1;
2660         }
2661
2662         for (i = 0; i < se_num; i++) {
2663                 cik_select_se_sh(rdev, i, 0xffffffff);
2664                 data = 0;
2665                 for (j = 0; j < sh_per_se; j++) {
2666                         switch (enabled_rbs & 3) {
2667                         case 1:
2668                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2669                                 break;
2670                         case 2:
2671                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2672                                 break;
2673                         case 3:
2674                         default:
2675                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2676                                 break;
2677                         }
2678                         enabled_rbs >>= 2;
2679                 }
2680                 WREG32(PA_SC_RASTER_CONFIG, data);
2681         }
2682         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2683 }
2684
2685 /**
2686  * cik_gpu_init - setup the 3D engine
2687  *
2688  * @rdev: radeon_device pointer
2689  *
2690  * Configures the 3D engine and tiling configuration
2691  * registers so that the 3D engine is usable.
2692  */
2693 static void cik_gpu_init(struct radeon_device *rdev)
2694 {
2695         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2696         u32 mc_shared_chmap, mc_arb_ramcfg;
2697         u32 hdp_host_path_cntl;
2698         u32 tmp;
2699         int i, j;
2700
2701         switch (rdev->family) {
2702         case CHIP_BONAIRE:
2703                 rdev->config.cik.max_shader_engines = 2;
2704                 rdev->config.cik.max_tile_pipes = 4;
2705                 rdev->config.cik.max_cu_per_sh = 7;
2706                 rdev->config.cik.max_sh_per_se = 1;
2707                 rdev->config.cik.max_backends_per_se = 2;
2708                 rdev->config.cik.max_texture_channel_caches = 4;
2709                 rdev->config.cik.max_gprs = 256;
2710                 rdev->config.cik.max_gs_threads = 32;
2711                 rdev->config.cik.max_hw_contexts = 8;
2712
2713                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2714                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2715                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2716                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2717                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2718                 break;
2719         case CHIP_KAVERI:
2720                 rdev->config.cik.max_shader_engines = 1;
2721                 rdev->config.cik.max_tile_pipes = 4;
2722                 if ((rdev->pdev->device == 0x1304) ||
2723                     (rdev->pdev->device == 0x1305) ||
2724                     (rdev->pdev->device == 0x130C) ||
2725                     (rdev->pdev->device == 0x130F) ||
2726                     (rdev->pdev->device == 0x1310) ||
2727                     (rdev->pdev->device == 0x1311) ||
2728                     (rdev->pdev->device == 0x131C)) {
2729                         rdev->config.cik.max_cu_per_sh = 8;
2730                         rdev->config.cik.max_backends_per_se = 2;
2731                 } else if ((rdev->pdev->device == 0x1309) ||
2732                            (rdev->pdev->device == 0x130A) ||
2733                            (rdev->pdev->device == 0x130D) ||
2734                            (rdev->pdev->device == 0x1313) ||
2735                            (rdev->pdev->device == 0x131D)) {
2736                         rdev->config.cik.max_cu_per_sh = 6;
2737                         rdev->config.cik.max_backends_per_se = 2;
2738                 } else if ((rdev->pdev->device == 0x1306) ||
2739                            (rdev->pdev->device == 0x1307) ||
2740                            (rdev->pdev->device == 0x130B) ||
2741                            (rdev->pdev->device == 0x130E) ||
2742                            (rdev->pdev->device == 0x1315) ||
2743                            (rdev->pdev->device == 0x131B)) {
2744                         rdev->config.cik.max_cu_per_sh = 4;
2745                         rdev->config.cik.max_backends_per_se = 1;
2746                 } else {
2747                         rdev->config.cik.max_cu_per_sh = 3;
2748                         rdev->config.cik.max_backends_per_se = 1;
2749                 }
2750                 rdev->config.cik.max_sh_per_se = 1;
2751                 rdev->config.cik.max_texture_channel_caches = 4;
2752                 rdev->config.cik.max_gprs = 256;
2753                 rdev->config.cik.max_gs_threads = 16;
2754                 rdev->config.cik.max_hw_contexts = 8;
2755
2756                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2757                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2758                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2759                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2760                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2761                 break;
2762         case CHIP_KABINI:
2763         default:
2764                 rdev->config.cik.max_shader_engines = 1;
2765                 rdev->config.cik.max_tile_pipes = 2;
2766                 rdev->config.cik.max_cu_per_sh = 2;
2767                 rdev->config.cik.max_sh_per_se = 1;
2768                 rdev->config.cik.max_backends_per_se = 1;
2769                 rdev->config.cik.max_texture_channel_caches = 2;
2770                 rdev->config.cik.max_gprs = 256;
2771                 rdev->config.cik.max_gs_threads = 16;
2772                 rdev->config.cik.max_hw_contexts = 8;
2773
2774                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2775                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2776                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2777                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2778                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2779                 break;
2780         }
2781
2782         /* Initialize HDP */
2783         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2784                 WREG32((0x2c14 + j), 0x00000000);
2785                 WREG32((0x2c18 + j), 0x00000000);
2786                 WREG32((0x2c1c + j), 0x00000000);
2787                 WREG32((0x2c20 + j), 0x00000000);
2788                 WREG32((0x2c24 + j), 0x00000000);
2789         }
2790
2791         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2792
2793         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2794
2795         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2796         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2797
2798         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2799         rdev->config.cik.mem_max_burst_length_bytes = 256;
2800         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2801         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2802         if (rdev->config.cik.mem_row_size_in_kb > 4)
2803                 rdev->config.cik.mem_row_size_in_kb = 4;
2804         /* XXX use MC settings? */
2805         rdev->config.cik.shader_engine_tile_size = 32;
2806         rdev->config.cik.num_gpus = 1;
2807         rdev->config.cik.multi_gpu_tile_size = 64;
2808
2809         /* fix up row size */
2810         gb_addr_config &= ~ROW_SIZE_MASK;
2811         switch (rdev->config.cik.mem_row_size_in_kb) {
2812         case 1:
2813         default:
2814                 gb_addr_config |= ROW_SIZE(0);
2815                 break;
2816         case 2:
2817                 gb_addr_config |= ROW_SIZE(1);
2818                 break;
2819         case 4:
2820                 gb_addr_config |= ROW_SIZE(2);
2821                 break;
2822         }
2823
2824         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2825          * not have bank info, so create a custom tiling dword.
2826          * bits 3:0   num_pipes
2827          * bits 7:4   num_banks
2828          * bits 11:8  group_size
2829          * bits 15:12 row_size
2830          */
2831         rdev->config.cik.tile_config = 0;
2832         switch (rdev->config.cik.num_tile_pipes) {
2833         case 1:
2834                 rdev->config.cik.tile_config |= (0 << 0);
2835                 break;
2836         case 2:
2837                 rdev->config.cik.tile_config |= (1 << 0);
2838                 break;
2839         case 4:
2840                 rdev->config.cik.tile_config |= (2 << 0);
2841                 break;
2842         case 8:
2843         default:
2844                 /* XXX what about 12? */
2845                 rdev->config.cik.tile_config |= (3 << 0);
2846                 break;
2847         }
2848         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2849                 rdev->config.cik.tile_config |= 1 << 4;
2850         else
2851                 rdev->config.cik.tile_config |= 0 << 4;
2852         rdev->config.cik.tile_config |=
2853                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2854         rdev->config.cik.tile_config |=
2855                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2856
2857         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2858         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2859         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2860         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2861         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2862         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2863         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2864         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2865
2866         cik_tiling_mode_table_init(rdev);
2867
2868         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2869                      rdev->config.cik.max_sh_per_se,
2870                      rdev->config.cik.max_backends_per_se);
2871
2872         /* set HW defaults for 3D engine */
2873         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2874
2875         WREG32(SX_DEBUG_1, 0x20);
2876
2877         WREG32(TA_CNTL_AUX, 0x00010000);
2878
2879         tmp = RREG32(SPI_CONFIG_CNTL);
2880         tmp |= 0x03000000;
2881         WREG32(SPI_CONFIG_CNTL, tmp);
2882
2883         WREG32(SQ_CONFIG, 1);
2884
2885         WREG32(DB_DEBUG, 0);
2886
2887         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2888         tmp |= 0x00000400;
2889         WREG32(DB_DEBUG2, tmp);
2890
2891         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2892         tmp |= 0x00020200;
2893         WREG32(DB_DEBUG3, tmp);
2894
2895         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2896         tmp |= 0x00018208;
2897         WREG32(CB_HW_CONTROL, tmp);
2898
2899         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2900
2901         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2902                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2903                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2904                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2905
2906         WREG32(VGT_NUM_INSTANCES, 1);
2907
2908         WREG32(CP_PERFMON_CNTL, 0);
2909
2910         WREG32(SQ_CONFIG, 0);
2911
2912         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2913                                           FORCE_EOV_MAX_REZ_CNT(255)));
2914
2915         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2916                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2917
2918         WREG32(VGT_GS_VERTEX_REUSE, 16);
2919         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2920
2921         tmp = RREG32(HDP_MISC_CNTL);
2922         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2923         WREG32(HDP_MISC_CNTL, tmp);
2924
2925         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2926         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2927
2928         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2929         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2930
2931         udelay(50);
2932 }
2933
2934 /*
2935  * GPU scratch registers helpers function.
2936  */
2937 /**
2938  * cik_scratch_init - setup driver info for CP scratch regs
2939  *
2940  * @rdev: radeon_device pointer
2941  *
2942  * Set up the number and offset of the CP scratch registers.
2943  * NOTE: use of CP scratch registers is a legacy inferface and
2944  * is not used by default on newer asics (r6xx+).  On newer asics,
2945  * memory buffers are used for fences rather than scratch regs.
2946  */
2947 static void cik_scratch_init(struct radeon_device *rdev)
2948 {
2949         int i;
2950
2951         rdev->scratch.num_reg = 7;
2952         rdev->scratch.reg_base = SCRATCH_REG0;
2953         for (i = 0; i < rdev->scratch.num_reg; i++) {
2954                 rdev->scratch.free[i] = true;
2955                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2956         }
2957 }
2958
2959 /**
2960  * cik_ring_test - basic gfx ring test
2961  *
2962  * @rdev: radeon_device pointer
2963  * @ring: radeon_ring structure holding ring information
2964  *
2965  * Allocate a scratch register and write to it using the gfx ring (CIK).
2966  * Provides a basic gfx ring test to verify that the ring is working.
2967  * Used by cik_cp_gfx_resume();
2968  * Returns 0 on success, error on failure.
2969  */
2970 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2971 {
2972         uint32_t scratch;
2973         uint32_t tmp = 0;
2974         unsigned i;
2975         int r;
2976
2977         r = radeon_scratch_get(rdev, &scratch);
2978         if (r) {
2979                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2980                 return r;
2981         }
2982         WREG32(scratch, 0xCAFEDEAD);
2983         r = radeon_ring_lock(rdev, ring, 3);
2984         if (r) {
2985                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2986                 radeon_scratch_free(rdev, scratch);
2987                 return r;
2988         }
2989         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2990         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2991         radeon_ring_write(ring, 0xDEADBEEF);
2992         radeon_ring_unlock_commit(rdev, ring);
2993
2994         for (i = 0; i < rdev->usec_timeout; i++) {
2995                 tmp = RREG32(scratch);
2996                 if (tmp == 0xDEADBEEF)
2997                         break;
2998                 DRM_UDELAY(1);
2999         }
3000         if (i < rdev->usec_timeout) {
3001                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3002         } else {
3003                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3004                           ring->idx, scratch, tmp);
3005                 r = -EINVAL;
3006         }
3007         radeon_scratch_free(rdev, scratch);
3008         return r;
3009 }
3010
3011 /**
3012  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3013  *
3014  * @rdev: radeon_device pointer
3015  * @fence: radeon fence object
3016  *
3017  * Emits a fence sequnce number on the gfx ring and flushes
3018  * GPU caches.
3019  */
3020 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3021                              struct radeon_fence *fence)
3022 {
3023         struct radeon_ring *ring = &rdev->ring[fence->ring];
3024         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3025
3026         /* EVENT_WRITE_EOP - flush caches, send int */
3027         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3028         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3029                                  EOP_TC_ACTION_EN |
3030                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3031                                  EVENT_INDEX(5)));
3032         radeon_ring_write(ring, addr & 0xfffffffc);
3033         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3034         radeon_ring_write(ring, fence->seq);
3035         radeon_ring_write(ring, 0);
3036         /* HDP flush */
3037         /* We should be using the new WAIT_REG_MEM special op packet here
3038          * but it causes the CP to hang
3039          */
3040         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3041         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3042                                  WRITE_DATA_DST_SEL(0)));
3043         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3044         radeon_ring_write(ring, 0);
3045         radeon_ring_write(ring, 0);
3046 }
3047
3048 /**
3049  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3050  *
3051  * @rdev: radeon_device pointer
3052  * @fence: radeon fence object
3053  *
3054  * Emits a fence sequnce number on the compute ring and flushes
3055  * GPU caches.
3056  */
3057 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3058                                  struct radeon_fence *fence)
3059 {
3060         struct radeon_ring *ring = &rdev->ring[fence->ring];
3061         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3062
3063         /* RELEASE_MEM - flush caches, send int */
3064         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3065         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3066                                  EOP_TC_ACTION_EN |
3067                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3068                                  EVENT_INDEX(5)));
3069         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3070         radeon_ring_write(ring, addr & 0xfffffffc);
3071         radeon_ring_write(ring, upper_32_bits(addr));
3072         radeon_ring_write(ring, fence->seq);
3073         radeon_ring_write(ring, 0);
3074         /* HDP flush */
3075         /* We should be using the new WAIT_REG_MEM special op packet here
3076          * but it causes the CP to hang
3077          */
3078         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3079         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3080                                  WRITE_DATA_DST_SEL(0)));
3081         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3082         radeon_ring_write(ring, 0);
3083         radeon_ring_write(ring, 0);
3084 }
3085
3086 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3087                              struct radeon_ring *ring,
3088                              struct radeon_semaphore *semaphore,
3089                              bool emit_wait)
3090 {
3091         uint64_t addr = semaphore->gpu_addr;
3092         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3093
3094         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3095         radeon_ring_write(ring, addr & 0xffffffff);
3096         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3097 }
3098
3099 /*
3100  * IB stuff
3101  */
3102 /**
3103  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3104  *
3105  * @rdev: radeon_device pointer
3106  * @ib: radeon indirect buffer object
3107  *
3108  * Emits an DE (drawing engine) or CE (constant engine) IB
3109  * on the gfx ring.  IBs are usually generated by userspace
3110  * acceleration drivers and submitted to the kernel for
3111  * sheduling on the ring.  This function schedules the IB
3112  * on the gfx ring for execution by the GPU.
3113  */
3114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3115 {
3116         struct radeon_ring *ring = &rdev->ring[ib->ring];
3117         u32 header, control = INDIRECT_BUFFER_VALID;
3118
3119         if (ib->is_const_ib) {
3120                 /* set switch buffer packet before const IB */
3121                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3122                 radeon_ring_write(ring, 0);
3123
3124                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3125         } else {
3126                 u32 next_rptr;
3127                 if (ring->rptr_save_reg) {
3128                         next_rptr = ring->wptr + 3 + 4;
3129                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3130                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3131                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3132                         radeon_ring_write(ring, next_rptr);
3133                 } else if (rdev->wb.enabled) {
3134                         next_rptr = ring->wptr + 5 + 4;
3135                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3136                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3137                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3138                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3139                         radeon_ring_write(ring, next_rptr);
3140                 }
3141
3142                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3143         }
3144
3145         control |= ib->length_dw |
3146                 (ib->vm ? (ib->vm->id << 24) : 0);
3147
3148         radeon_ring_write(ring, header);
3149         radeon_ring_write(ring,
3150 #ifdef __BIG_ENDIAN
3151                           (2 << 0) |
3152 #endif
3153                           (ib->gpu_addr & 0xFFFFFFFC));
3154         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3155         radeon_ring_write(ring, control);
3156 }
3157
3158 /**
3159  * cik_ib_test - basic gfx ring IB test
3160  *
3161  * @rdev: radeon_device pointer
3162  * @ring: radeon_ring structure holding ring information
3163  *
3164  * Allocate an IB and execute it on the gfx ring (CIK).
3165  * Provides a basic gfx ring test to verify that IBs are working.
3166  * Returns 0 on success, error on failure.
3167  */
3168 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3169 {
3170         struct radeon_ib ib;
3171         uint32_t scratch;
3172         uint32_t tmp = 0;
3173         unsigned i;
3174         int r;
3175
3176         r = radeon_scratch_get(rdev, &scratch);
3177         if (r) {
3178                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3179                 return r;
3180         }
3181         WREG32(scratch, 0xCAFEDEAD);
3182         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3183         if (r) {
3184                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3185                 return r;
3186         }
3187         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3188         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3189         ib.ptr[2] = 0xDEADBEEF;
3190         ib.length_dw = 3;
3191         r = radeon_ib_schedule(rdev, &ib, NULL);
3192         if (r) {
3193                 radeon_scratch_free(rdev, scratch);
3194                 radeon_ib_free(rdev, &ib);
3195                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3196                 return r;
3197         }
3198         r = radeon_fence_wait(ib.fence, false);
3199         if (r) {
3200                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3201                 return r;
3202         }
3203         for (i = 0; i < rdev->usec_timeout; i++) {
3204                 tmp = RREG32(scratch);
3205                 if (tmp == 0xDEADBEEF)
3206                         break;
3207                 DRM_UDELAY(1);
3208         }
3209         if (i < rdev->usec_timeout) {
3210                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3211         } else {
3212                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3213                           scratch, tmp);
3214                 r = -EINVAL;
3215         }
3216         radeon_scratch_free(rdev, scratch);
3217         radeon_ib_free(rdev, &ib);
3218         return r;
3219 }
3220
3221 /*
3222  * CP.
3223  * On CIK, gfx and compute now have independant command processors.
3224  *
3225  * GFX
3226  * Gfx consists of a single ring and can process both gfx jobs and
3227  * compute jobs.  The gfx CP consists of three microengines (ME):
3228  * PFP - Pre-Fetch Parser
3229  * ME - Micro Engine
3230  * CE - Constant Engine
3231  * The PFP and ME make up what is considered the Drawing Engine (DE).
3232  * The CE is an asynchronous engine used for updating buffer desciptors
3233  * used by the DE so that they can be loaded into cache in parallel
3234  * while the DE is processing state update packets.
3235  *
3236  * Compute
3237  * The compute CP consists of two microengines (ME):
3238  * MEC1 - Compute MicroEngine 1
3239  * MEC2 - Compute MicroEngine 2
3240  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3241  * The queues are exposed to userspace and are programmed directly
3242  * by the compute runtime.
3243  */
3244 /**
3245  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3246  *
3247  * @rdev: radeon_device pointer
3248  * @enable: enable or disable the MEs
3249  *
3250  * Halts or unhalts the gfx MEs.
3251  */
3252 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3253 {
3254         if (enable)
3255                 WREG32(CP_ME_CNTL, 0);
3256         else {
3257                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3258                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3259         }
3260         udelay(50);
3261 }
3262
3263 /**
3264  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3265  *
3266  * @rdev: radeon_device pointer
3267  *
3268  * Loads the gfx PFP, ME, and CE ucode.
3269  * Returns 0 for success, -EINVAL if the ucode is not available.
3270  */
3271 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3272 {
3273         const __be32 *fw_data;
3274         int i;
3275
3276         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3277                 return -EINVAL;
3278
3279         cik_cp_gfx_enable(rdev, false);
3280
3281         /* PFP */
3282         fw_data = (const __be32 *)rdev->pfp_fw->data;
3283         WREG32(CP_PFP_UCODE_ADDR, 0);
3284         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3285                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3286         WREG32(CP_PFP_UCODE_ADDR, 0);
3287
3288         /* CE */
3289         fw_data = (const __be32 *)rdev->ce_fw->data;
3290         WREG32(CP_CE_UCODE_ADDR, 0);
3291         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3292                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3293         WREG32(CP_CE_UCODE_ADDR, 0);
3294
3295         /* ME */
3296         fw_data = (const __be32 *)rdev->me_fw->data;
3297         WREG32(CP_ME_RAM_WADDR, 0);
3298         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3299                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3300         WREG32(CP_ME_RAM_WADDR, 0);
3301
3302         WREG32(CP_PFP_UCODE_ADDR, 0);
3303         WREG32(CP_CE_UCODE_ADDR, 0);
3304         WREG32(CP_ME_RAM_WADDR, 0);
3305         WREG32(CP_ME_RAM_RADDR, 0);
3306         return 0;
3307 }
3308
3309 /**
3310  * cik_cp_gfx_start - start the gfx ring
3311  *
3312  * @rdev: radeon_device pointer
3313  *
3314  * Enables the ring and loads the clear state context and other
3315  * packets required to init the ring.
3316  * Returns 0 for success, error for failure.
3317  */
3318 static int cik_cp_gfx_start(struct radeon_device *rdev)
3319 {
3320         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3321         int r, i;
3322
3323         /* init the CP */
3324         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3325         WREG32(CP_ENDIAN_SWAP, 0);
3326         WREG32(CP_DEVICE_ID, 1);
3327
3328         cik_cp_gfx_enable(rdev, true);
3329
3330         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3331         if (r) {
3332                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3333                 return r;
3334         }
3335
3336         /* init the CE partitions.  CE only used for gfx on CIK */
3337         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3338         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3339         radeon_ring_write(ring, 0xc000);
3340         radeon_ring_write(ring, 0xc000);
3341
3342         /* setup clear context state */
3343         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3345
3346         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3347         radeon_ring_write(ring, 0x80000000);
3348         radeon_ring_write(ring, 0x80000000);
3349
3350         for (i = 0; i < cik_default_size; i++)
3351                 radeon_ring_write(ring, cik_default_state[i]);
3352
3353         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3354         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3355
3356         /* set clear context state */
3357         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3358         radeon_ring_write(ring, 0);
3359
3360         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3361         radeon_ring_write(ring, 0x00000316);
3362         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3363         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3364
3365         radeon_ring_unlock_commit(rdev, ring);
3366
3367         return 0;
3368 }
3369
3370 /**
3371  * cik_cp_gfx_fini - stop the gfx ring
3372  *
3373  * @rdev: radeon_device pointer
3374  *
3375  * Stop the gfx ring and tear down the driver ring
3376  * info.
3377  */
3378 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3379 {
3380         cik_cp_gfx_enable(rdev, false);
3381         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3382 }
3383
3384 /**
3385  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3386  *
3387  * @rdev: radeon_device pointer
3388  *
3389  * Program the location and size of the gfx ring buffer
3390  * and test it to make sure it's working.
3391  * Returns 0 for success, error for failure.
3392  */
3393 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3394 {
3395         struct radeon_ring *ring;
3396         u32 tmp;
3397         u32 rb_bufsz;
3398         u64 rb_addr;
3399         int r;
3400
3401         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3402         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3403
3404         /* Set the write pointer delay */
3405         WREG32(CP_RB_WPTR_DELAY, 0);
3406
3407         /* set the RB to use vmid 0 */
3408         WREG32(CP_RB_VMID, 0);
3409
3410         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3411
3412         /* ring 0 - compute and gfx */
3413         /* Set ring buffer size */
3414         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3415         rb_bufsz = order_base_2(ring->ring_size / 8);
3416         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3417 #ifdef __BIG_ENDIAN
3418         tmp |= BUF_SWAP_32BIT;
3419 #endif
3420         WREG32(CP_RB0_CNTL, tmp);
3421
3422         /* Initialize the ring buffer's read and write pointers */
3423         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3424         ring->wptr = 0;
3425         WREG32(CP_RB0_WPTR, ring->wptr);
3426
3427         /* set the wb address wether it's enabled or not */
3428         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3429         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3430
3431         /* scratch register shadowing is no longer supported */
3432         WREG32(SCRATCH_UMSK, 0);
3433
3434         if (!rdev->wb.enabled)
3435                 tmp |= RB_NO_UPDATE;
3436
3437         mdelay(1);
3438         WREG32(CP_RB0_CNTL, tmp);
3439
3440         rb_addr = ring->gpu_addr >> 8;
3441         WREG32(CP_RB0_BASE, rb_addr);
3442         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3443
3444         ring->rptr = RREG32(CP_RB0_RPTR);
3445
3446         /* start the ring */
3447         cik_cp_gfx_start(rdev);
3448         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3449         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3450         if (r) {
3451                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3452                 return r;
3453         }
3454         return 0;
3455 }
3456
3457 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3458                               struct radeon_ring *ring)
3459 {
3460         u32 rptr;
3461
3462
3463
3464         if (rdev->wb.enabled) {
3465                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3466         } else {
3467                 mutex_lock(&rdev->srbm_mutex);
3468                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3469                 rptr = RREG32(CP_HQD_PQ_RPTR);
3470                 cik_srbm_select(rdev, 0, 0, 0, 0);
3471                 mutex_unlock(&rdev->srbm_mutex);
3472         }
3473
3474         return rptr;
3475 }
3476
3477 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3478                               struct radeon_ring *ring)
3479 {
3480         u32 wptr;
3481
3482         if (rdev->wb.enabled) {
3483                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3484         } else {
3485                 mutex_lock(&rdev->srbm_mutex);
3486                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3487                 wptr = RREG32(CP_HQD_PQ_WPTR);
3488                 cik_srbm_select(rdev, 0, 0, 0, 0);
3489                 mutex_unlock(&rdev->srbm_mutex);
3490         }
3491
3492         return wptr;
3493 }
3494
3495 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3496                                struct radeon_ring *ring)
3497 {
3498         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3499         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3500 }
3501
3502 /**
3503  * cik_cp_compute_enable - enable/disable the compute CP MEs
3504  *
3505  * @rdev: radeon_device pointer
3506  * @enable: enable or disable the MEs
3507  *
3508  * Halts or unhalts the compute MEs.
3509  */
3510 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3511 {
3512         if (enable)
3513                 WREG32(CP_MEC_CNTL, 0);
3514         else
3515                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3516         udelay(50);
3517 }
3518
3519 /**
3520  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3521  *
3522  * @rdev: radeon_device pointer
3523  *
3524  * Loads the compute MEC1&2 ucode.
3525  * Returns 0 for success, -EINVAL if the ucode is not available.
3526  */
3527 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3528 {
3529         const __be32 *fw_data;
3530         int i;
3531
3532         if (!rdev->mec_fw)
3533                 return -EINVAL;
3534
3535         cik_cp_compute_enable(rdev, false);
3536
3537         /* MEC1 */
3538         fw_data = (const __be32 *)rdev->mec_fw->data;
3539         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3540         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3541                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3542         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3543
3544         if (rdev->family == CHIP_KAVERI) {
3545                 /* MEC2 */
3546                 fw_data = (const __be32 *)rdev->mec_fw->data;
3547                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3548                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3549                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3550                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3551         }
3552
3553         return 0;
3554 }
3555
3556 /**
3557  * cik_cp_compute_start - start the compute queues
3558  *
3559  * @rdev: radeon_device pointer
3560  *
3561  * Enable the compute queues.
3562  * Returns 0 for success, error for failure.
3563  */
3564 static int cik_cp_compute_start(struct radeon_device *rdev)
3565 {
3566         cik_cp_compute_enable(rdev, true);
3567
3568         return 0;
3569 }
3570
3571 /**
3572  * cik_cp_compute_fini - stop the compute queues
3573  *
3574  * @rdev: radeon_device pointer
3575  *
3576  * Stop the compute queues and tear down the driver queue
3577  * info.
3578  */
3579 static void cik_cp_compute_fini(struct radeon_device *rdev)
3580 {
3581         int i, idx, r;
3582
3583         cik_cp_compute_enable(rdev, false);
3584
3585         for (i = 0; i < 2; i++) {
3586                 if (i == 0)
3587                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3588                 else
3589                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3590
3591                 if (rdev->ring[idx].mqd_obj) {
3592                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3593                         if (unlikely(r != 0))
3594                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3595
3596                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3597                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3598
3599                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3600                         rdev->ring[idx].mqd_obj = NULL;
3601                 }
3602         }
3603 }
3604
3605 static void cik_mec_fini(struct radeon_device *rdev)
3606 {
3607         int r;
3608
3609         if (rdev->mec.hpd_eop_obj) {
3610                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3611                 if (unlikely(r != 0))
3612                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3613                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3614                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3615
3616                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3617                 rdev->mec.hpd_eop_obj = NULL;
3618         }
3619 }
3620
3621 #define MEC_HPD_SIZE 2048
3622
3623 static int cik_mec_init(struct radeon_device *rdev)
3624 {
3625         int r;
3626         u32 *hpd;
3627
3628         /*
3629          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3630          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3631          */
3632         if (rdev->family == CHIP_KAVERI)
3633                 rdev->mec.num_mec = 2;
3634         else
3635                 rdev->mec.num_mec = 1;
3636         rdev->mec.num_pipe = 4;
3637         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3638
3639         if (rdev->mec.hpd_eop_obj == NULL) {
3640                 r = radeon_bo_create(rdev,
3641                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3642                                      PAGE_SIZE, true,
3643                                      RADEON_GEM_DOMAIN_GTT, NULL,
3644                                      &rdev->mec.hpd_eop_obj);
3645                 if (r) {
3646                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3647                         return r;
3648                 }
3649         }
3650
3651         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3652         if (unlikely(r != 0)) {
3653                 cik_mec_fini(rdev);
3654                 return r;
3655         }
3656         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3657                           &rdev->mec.hpd_eop_gpu_addr);
3658         if (r) {
3659                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3660                 cik_mec_fini(rdev);
3661                 return r;
3662         }
3663         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3664         if (r) {
3665                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3666                 cik_mec_fini(rdev);
3667                 return r;
3668         }
3669
3670         /* clear memory.  Not sure if this is required or not */
3671         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3672
3673         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3674         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3675
3676         return 0;
3677 }
3678
3679 struct hqd_registers
3680 {
3681         u32 cp_mqd_base_addr;
3682         u32 cp_mqd_base_addr_hi;
3683         u32 cp_hqd_active;
3684         u32 cp_hqd_vmid;
3685         u32 cp_hqd_persistent_state;
3686         u32 cp_hqd_pipe_priority;
3687         u32 cp_hqd_queue_priority;
3688         u32 cp_hqd_quantum;
3689         u32 cp_hqd_pq_base;
3690         u32 cp_hqd_pq_base_hi;
3691         u32 cp_hqd_pq_rptr;
3692         u32 cp_hqd_pq_rptr_report_addr;
3693         u32 cp_hqd_pq_rptr_report_addr_hi;
3694         u32 cp_hqd_pq_wptr_poll_addr;
3695         u32 cp_hqd_pq_wptr_poll_addr_hi;
3696         u32 cp_hqd_pq_doorbell_control;
3697         u32 cp_hqd_pq_wptr;
3698         u32 cp_hqd_pq_control;
3699         u32 cp_hqd_ib_base_addr;
3700         u32 cp_hqd_ib_base_addr_hi;
3701         u32 cp_hqd_ib_rptr;
3702         u32 cp_hqd_ib_control;
3703         u32 cp_hqd_iq_timer;
3704         u32 cp_hqd_iq_rptr;
3705         u32 cp_hqd_dequeue_request;
3706         u32 cp_hqd_dma_offload;
3707         u32 cp_hqd_sema_cmd;
3708         u32 cp_hqd_msg_type;
3709         u32 cp_hqd_atomic0_preop_lo;
3710         u32 cp_hqd_atomic0_preop_hi;
3711         u32 cp_hqd_atomic1_preop_lo;
3712         u32 cp_hqd_atomic1_preop_hi;
3713         u32 cp_hqd_hq_scheduler0;
3714         u32 cp_hqd_hq_scheduler1;
3715         u32 cp_mqd_control;
3716 };
3717
3718 struct bonaire_mqd
3719 {
3720         u32 header;
3721         u32 dispatch_initiator;
3722         u32 dimensions[3];
3723         u32 start_idx[3];
3724         u32 num_threads[3];
3725         u32 pipeline_stat_enable;
3726         u32 perf_counter_enable;
3727         u32 pgm[2];
3728         u32 tba[2];
3729         u32 tma[2];
3730         u32 pgm_rsrc[2];
3731         u32 vmid;
3732         u32 resource_limits;
3733         u32 static_thread_mgmt01[2];
3734         u32 tmp_ring_size;
3735         u32 static_thread_mgmt23[2];
3736         u32 restart[3];
3737         u32 thread_trace_enable;
3738         u32 reserved1;
3739         u32 user_data[16];
3740         u32 vgtcs_invoke_count[2];
3741         struct hqd_registers queue_state;
3742         u32 dequeue_cntr;
3743         u32 interrupt_queue[64];
3744 };
3745
3746 /**
3747  * cik_cp_compute_resume - setup the compute queue registers
3748  *
3749  * @rdev: radeon_device pointer
3750  *
3751  * Program the compute queues and test them to make sure they
3752  * are working.
3753  * Returns 0 for success, error for failure.
3754  */
3755 static int cik_cp_compute_resume(struct radeon_device *rdev)
3756 {
3757         int r, i, idx;
3758         u32 tmp;
3759         bool use_doorbell = true;
3760         u64 hqd_gpu_addr;
3761         u64 mqd_gpu_addr;
3762         u64 eop_gpu_addr;
3763         u64 wb_gpu_addr;
3764         u32 *buf;
3765         struct bonaire_mqd *mqd;
3766
3767         r = cik_cp_compute_start(rdev);
3768         if (r)
3769                 return r;
3770
3771         /* fix up chicken bits */
3772         tmp = RREG32(CP_CPF_DEBUG);
3773         tmp |= (1 << 23);
3774         WREG32(CP_CPF_DEBUG, tmp);
3775
3776         /* init the pipes */
3777         mutex_lock(&rdev->srbm_mutex);
3778         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3779                 int me = (i < 4) ? 1 : 2;
3780                 int pipe = (i < 4) ? i : (i - 4);
3781
3782                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3783
3784                 cik_srbm_select(rdev, me, pipe, 0, 0);
3785
3786                 /* write the EOP addr */
3787                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3788                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3789
3790                 /* set the VMID assigned */
3791                 WREG32(CP_HPD_EOP_VMID, 0);
3792
3793                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3794                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3795                 tmp &= ~EOP_SIZE_MASK;
3796                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3797                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3798         }
3799         cik_srbm_select(rdev, 0, 0, 0, 0);
3800         mutex_unlock(&rdev->srbm_mutex);
3801
3802         /* init the queues.  Just two for now. */
3803         for (i = 0; i < 2; i++) {
3804                 if (i == 0)
3805                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3806                 else
3807                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3808
3809                 if (rdev->ring[idx].mqd_obj == NULL) {
3810                         r = radeon_bo_create(rdev,
3811                                              sizeof(struct bonaire_mqd),
3812                                              PAGE_SIZE, true,
3813                                              RADEON_GEM_DOMAIN_GTT, NULL,
3814                                              &rdev->ring[idx].mqd_obj);
3815                         if (r) {
3816                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3817                                 return r;
3818                         }
3819                 }
3820
3821                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3822                 if (unlikely(r != 0)) {
3823                         cik_cp_compute_fini(rdev);
3824                         return r;
3825                 }
3826                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3827                                   &mqd_gpu_addr);
3828                 if (r) {
3829                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3830                         cik_cp_compute_fini(rdev);
3831                         return r;
3832                 }
3833                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3834                 if (r) {
3835                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3836                         cik_cp_compute_fini(rdev);
3837                         return r;
3838                 }
3839
3840                 /* doorbell offset */
3841                 rdev->ring[idx].doorbell_offset =
3842                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3843
3844                 /* init the mqd struct */
3845                 memset(buf, 0, sizeof(struct bonaire_mqd));
3846
3847                 mqd = (struct bonaire_mqd *)buf;
3848                 mqd->header = 0xC0310800;
3849                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3850                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3851                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3852                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3853
3854                 mutex_lock(&rdev->srbm_mutex);
3855                 cik_srbm_select(rdev, rdev->ring[idx].me,
3856                                 rdev->ring[idx].pipe,
3857                                 rdev->ring[idx].queue, 0);
3858
3859                 /* disable wptr polling */
3860                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3861                 tmp &= ~WPTR_POLL_EN;
3862                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3863
3864                 /* enable doorbell? */
3865                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3866                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3867                 if (use_doorbell)
3868                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3869                 else
3870                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3871                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3872                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3873
3874                 /* disable the queue if it's active */
3875                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3876                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3877                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3878                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3879                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3880                         for (i = 0; i < rdev->usec_timeout; i++) {
3881                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3882                                         break;
3883                                 udelay(1);
3884                         }
3885                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3886                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3887                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3888                 }
3889
3890                 /* set the pointer to the MQD */
3891                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3892                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3893                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3894                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3895                 /* set MQD vmid to 0 */
3896                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3897                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3898                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3899
3900                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3901                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3902                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3903                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3904                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3905                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3906
3907                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3908                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3909                 mqd->queue_state.cp_hqd_pq_control &=
3910                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3911
3912                 mqd->queue_state.cp_hqd_pq_control |=
3913                         order_base_2(rdev->ring[idx].ring_size / 8);
3914                 mqd->queue_state.cp_hqd_pq_control |=
3915                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3916 #ifdef __BIG_ENDIAN
3917                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3918 #endif
3919                 mqd->queue_state.cp_hqd_pq_control &=
3920                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3921                 mqd->queue_state.cp_hqd_pq_control |=
3922                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3923                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3924
3925                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3926                 if (i == 0)
3927                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3928                 else
3929                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3930                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3931                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3932                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3933                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3934                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3935
3936                 /* set the wb address wether it's enabled or not */
3937                 if (i == 0)
3938                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3939                 else
3940                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3941                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3942                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3943                         upper_32_bits(wb_gpu_addr) & 0xffff;
3944                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3945                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3946                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3947                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3948
3949                 /* enable the doorbell if requested */
3950                 if (use_doorbell) {
3951                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3952                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3953                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3954                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3955                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3956                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3957                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3958                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3959
3960                 } else {
3961                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3962                 }
3963                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3964                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3965
3966                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3967                 rdev->ring[idx].wptr = 0;
3968                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3969                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3970                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3971                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3972
3973                 /* set the vmid for the queue */
3974                 mqd->queue_state.cp_hqd_vmid = 0;
3975                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3976
3977                 /* activate the queue */
3978                 mqd->queue_state.cp_hqd_active = 1;
3979                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3980
3981                 cik_srbm_select(rdev, 0, 0, 0, 0);
3982                 mutex_unlock(&rdev->srbm_mutex);
3983
3984                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3985                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3986
3987                 rdev->ring[idx].ready = true;
3988                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3989                 if (r)
3990                         rdev->ring[idx].ready = false;
3991         }
3992
3993         return 0;
3994 }
3995
3996 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3997 {
3998         cik_cp_gfx_enable(rdev, enable);
3999         cik_cp_compute_enable(rdev, enable);
4000 }
4001
4002 static int cik_cp_load_microcode(struct radeon_device *rdev)
4003 {
4004         int r;
4005
4006         r = cik_cp_gfx_load_microcode(rdev);
4007         if (r)
4008                 return r;
4009         r = cik_cp_compute_load_microcode(rdev);
4010         if (r)
4011                 return r;
4012
4013         return 0;
4014 }
4015
4016 static void cik_cp_fini(struct radeon_device *rdev)
4017 {
4018         cik_cp_gfx_fini(rdev);
4019         cik_cp_compute_fini(rdev);
4020 }
4021
4022 static int cik_cp_resume(struct radeon_device *rdev)
4023 {
4024         int r;
4025
4026         cik_enable_gui_idle_interrupt(rdev, false);
4027
4028         r = cik_cp_load_microcode(rdev);
4029         if (r)
4030                 return r;
4031
4032         r = cik_cp_gfx_resume(rdev);
4033         if (r)
4034                 return r;
4035         r = cik_cp_compute_resume(rdev);
4036         if (r)
4037                 return r;
4038
4039         cik_enable_gui_idle_interrupt(rdev, true);
4040
4041         return 0;
4042 }
4043
4044 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4045 {
4046         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4047                 RREG32(GRBM_STATUS));
4048         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4049                 RREG32(GRBM_STATUS2));
4050         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4051                 RREG32(GRBM_STATUS_SE0));
4052         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4053                 RREG32(GRBM_STATUS_SE1));
4054         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4055                 RREG32(GRBM_STATUS_SE2));
4056         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4057                 RREG32(GRBM_STATUS_SE3));
4058         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4059                 RREG32(SRBM_STATUS));
4060         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4061                 RREG32(SRBM_STATUS2));
4062         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4063                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4064         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4065                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4066         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4067         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4068                  RREG32(CP_STALLED_STAT1));
4069         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4070                  RREG32(CP_STALLED_STAT2));
4071         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4072                  RREG32(CP_STALLED_STAT3));
4073         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4074                  RREG32(CP_CPF_BUSY_STAT));
4075         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4076                  RREG32(CP_CPF_STALLED_STAT1));
4077         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4078         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4079         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4080                  RREG32(CP_CPC_STALLED_STAT1));
4081         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4082 }
4083
4084 /**
4085  * cik_gpu_check_soft_reset - check which blocks are busy
4086  *
4087  * @rdev: radeon_device pointer
4088  *
4089  * Check which blocks are busy and return the relevant reset
4090  * mask to be used by cik_gpu_soft_reset().
4091  * Returns a mask of the blocks to be reset.
4092  */
4093 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4094 {
4095         u32 reset_mask = 0;
4096         u32 tmp;
4097
4098         /* GRBM_STATUS */
4099         tmp = RREG32(GRBM_STATUS);
4100         if (tmp & (PA_BUSY | SC_BUSY |
4101                    BCI_BUSY | SX_BUSY |
4102                    TA_BUSY | VGT_BUSY |
4103                    DB_BUSY | CB_BUSY |
4104                    GDS_BUSY | SPI_BUSY |
4105                    IA_BUSY | IA_BUSY_NO_DMA))
4106                 reset_mask |= RADEON_RESET_GFX;
4107
4108         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4109                 reset_mask |= RADEON_RESET_CP;
4110
4111         /* GRBM_STATUS2 */
4112         tmp = RREG32(GRBM_STATUS2);
4113         if (tmp & RLC_BUSY)
4114                 reset_mask |= RADEON_RESET_RLC;
4115
4116         /* SDMA0_STATUS_REG */
4117         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4118         if (!(tmp & SDMA_IDLE))
4119                 reset_mask |= RADEON_RESET_DMA;
4120
4121         /* SDMA1_STATUS_REG */
4122         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4123         if (!(tmp & SDMA_IDLE))
4124                 reset_mask |= RADEON_RESET_DMA1;
4125
4126         /* SRBM_STATUS2 */
4127         tmp = RREG32(SRBM_STATUS2);
4128         if (tmp & SDMA_BUSY)
4129                 reset_mask |= RADEON_RESET_DMA;
4130
4131         if (tmp & SDMA1_BUSY)
4132                 reset_mask |= RADEON_RESET_DMA1;
4133
4134         /* SRBM_STATUS */
4135         tmp = RREG32(SRBM_STATUS);
4136
4137         if (tmp & IH_BUSY)
4138                 reset_mask |= RADEON_RESET_IH;
4139
4140         if (tmp & SEM_BUSY)
4141                 reset_mask |= RADEON_RESET_SEM;
4142
4143         if (tmp & GRBM_RQ_PENDING)
4144                 reset_mask |= RADEON_RESET_GRBM;
4145
4146         if (tmp & VMC_BUSY)
4147                 reset_mask |= RADEON_RESET_VMC;
4148
4149         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4150                    MCC_BUSY | MCD_BUSY))
4151                 reset_mask |= RADEON_RESET_MC;
4152
4153         if (evergreen_is_display_hung(rdev))
4154                 reset_mask |= RADEON_RESET_DISPLAY;
4155
4156         /* Skip MC reset as it's mostly likely not hung, just busy */
4157         if (reset_mask & RADEON_RESET_MC) {
4158                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4159                 reset_mask &= ~RADEON_RESET_MC;
4160         }
4161
4162         return reset_mask;
4163 }
4164
4165 /**
4166  * cik_gpu_soft_reset - soft reset GPU
4167  *
4168  * @rdev: radeon_device pointer
4169  * @reset_mask: mask of which blocks to reset
4170  *
4171  * Soft reset the blocks specified in @reset_mask.
4172  */
4173 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4174 {
4175         struct evergreen_mc_save save;
4176         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4177         u32 tmp;
4178
4179         if (reset_mask == 0)
4180                 return;
4181
4182         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4183
4184         cik_print_gpu_status_regs(rdev);
4185         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4186                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4187         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4188                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4189
4190         /* stop the rlc */
4191         cik_rlc_stop(rdev);
4192
4193         /* Disable GFX parsing/prefetching */
4194         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4195
4196         /* Disable MEC parsing/prefetching */
4197         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4198
4199         if (reset_mask & RADEON_RESET_DMA) {
4200                 /* sdma0 */
4201                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4202                 tmp |= SDMA_HALT;
4203                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4204         }
4205         if (reset_mask & RADEON_RESET_DMA1) {
4206                 /* sdma1 */
4207                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4208                 tmp |= SDMA_HALT;
4209                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4210         }
4211
4212         evergreen_mc_stop(rdev, &save);
4213         if (evergreen_mc_wait_for_idle(rdev)) {
4214                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4215         }
4216
4217         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4218                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4219
4220         if (reset_mask & RADEON_RESET_CP) {
4221                 grbm_soft_reset |= SOFT_RESET_CP;
4222
4223                 srbm_soft_reset |= SOFT_RESET_GRBM;
4224         }
4225
4226         if (reset_mask & RADEON_RESET_DMA)
4227                 srbm_soft_reset |= SOFT_RESET_SDMA;
4228
4229         if (reset_mask & RADEON_RESET_DMA1)
4230                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4231
4232         if (reset_mask & RADEON_RESET_DISPLAY)
4233                 srbm_soft_reset |= SOFT_RESET_DC;
4234
4235         if (reset_mask & RADEON_RESET_RLC)
4236                 grbm_soft_reset |= SOFT_RESET_RLC;
4237
4238         if (reset_mask & RADEON_RESET_SEM)
4239                 srbm_soft_reset |= SOFT_RESET_SEM;
4240
4241         if (reset_mask & RADEON_RESET_IH)
4242                 srbm_soft_reset |= SOFT_RESET_IH;
4243
4244         if (reset_mask & RADEON_RESET_GRBM)
4245                 srbm_soft_reset |= SOFT_RESET_GRBM;
4246
4247         if (reset_mask & RADEON_RESET_VMC)
4248                 srbm_soft_reset |= SOFT_RESET_VMC;
4249
4250         if (!(rdev->flags & RADEON_IS_IGP)) {
4251                 if (reset_mask & RADEON_RESET_MC)
4252                         srbm_soft_reset |= SOFT_RESET_MC;
4253         }
4254
4255         if (grbm_soft_reset) {
4256                 tmp = RREG32(GRBM_SOFT_RESET);
4257                 tmp |= grbm_soft_reset;
4258                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4259                 WREG32(GRBM_SOFT_RESET, tmp);
4260                 tmp = RREG32(GRBM_SOFT_RESET);
4261
4262                 udelay(50);
4263
4264                 tmp &= ~grbm_soft_reset;
4265                 WREG32(GRBM_SOFT_RESET, tmp);
4266                 tmp = RREG32(GRBM_SOFT_RESET);
4267         }
4268
4269         if (srbm_soft_reset) {
4270                 tmp = RREG32(SRBM_SOFT_RESET);
4271                 tmp |= srbm_soft_reset;
4272                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4273                 WREG32(SRBM_SOFT_RESET, tmp);
4274                 tmp = RREG32(SRBM_SOFT_RESET);
4275
4276                 udelay(50);
4277
4278                 tmp &= ~srbm_soft_reset;
4279                 WREG32(SRBM_SOFT_RESET, tmp);
4280                 tmp = RREG32(SRBM_SOFT_RESET);
4281         }
4282
4283         /* Wait a little for things to settle down */
4284         udelay(50);
4285
4286         evergreen_mc_resume(rdev, &save);
4287         udelay(50);
4288
4289         cik_print_gpu_status_regs(rdev);
4290 }
4291
4292 /**
4293  * cik_asic_reset - soft reset GPU
4294  *
4295  * @rdev: radeon_device pointer
4296  *
4297  * Look up which blocks are hung and attempt
4298  * to reset them.
4299  * Returns 0 for success.
4300  */
4301 int cik_asic_reset(struct radeon_device *rdev)
4302 {
4303         u32 reset_mask;
4304
4305         reset_mask = cik_gpu_check_soft_reset(rdev);
4306
4307         if (reset_mask)
4308                 r600_set_bios_scratch_engine_hung(rdev, true);
4309
4310         cik_gpu_soft_reset(rdev, reset_mask);
4311
4312         reset_mask = cik_gpu_check_soft_reset(rdev);
4313
4314         if (!reset_mask)
4315                 r600_set_bios_scratch_engine_hung(rdev, false);
4316
4317         return 0;
4318 }
4319
4320 /**
4321  * cik_gfx_is_lockup - check if the 3D engine is locked up
4322  *
4323  * @rdev: radeon_device pointer
4324  * @ring: radeon_ring structure holding ring information
4325  *
4326  * Check if the 3D engine is locked up (CIK).
4327  * Returns true if the engine is locked, false if not.
4328  */
4329 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4330 {
4331         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4332
4333         if (!(reset_mask & (RADEON_RESET_GFX |
4334                             RADEON_RESET_COMPUTE |
4335                             RADEON_RESET_CP))) {
4336                 radeon_ring_lockup_update(ring);
4337                 return false;
4338         }
4339         /* force CP activities */
4340         radeon_ring_force_activity(rdev, ring);
4341         return radeon_ring_test_lockup(rdev, ring);
4342 }
4343
4344 /* MC */
4345 /**
4346  * cik_mc_program - program the GPU memory controller
4347  *
4348  * @rdev: radeon_device pointer
4349  *
4350  * Set the location of vram, gart, and AGP in the GPU's
4351  * physical address space (CIK).
4352  */
4353 static void cik_mc_program(struct radeon_device *rdev)
4354 {
4355         struct evergreen_mc_save save;
4356         u32 tmp;
4357         int i, j;
4358
4359         /* Initialize HDP */
4360         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4361                 WREG32((0x2c14 + j), 0x00000000);
4362                 WREG32((0x2c18 + j), 0x00000000);
4363                 WREG32((0x2c1c + j), 0x00000000);
4364                 WREG32((0x2c20 + j), 0x00000000);
4365                 WREG32((0x2c24 + j), 0x00000000);
4366         }
4367         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4368
4369         evergreen_mc_stop(rdev, &save);
4370         if (radeon_mc_wait_for_idle(rdev)) {
4371                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4372         }
4373         /* Lockout access through VGA aperture*/
4374         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4375         /* Update configuration */
4376         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4377                rdev->mc.vram_start >> 12);
4378         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4379                rdev->mc.vram_end >> 12);
4380         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4381                rdev->vram_scratch.gpu_addr >> 12);
4382         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4383         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4384         WREG32(MC_VM_FB_LOCATION, tmp);
4385         /* XXX double check these! */
4386         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4387         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4388         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4389         WREG32(MC_VM_AGP_BASE, 0);
4390         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4391         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4392         if (radeon_mc_wait_for_idle(rdev)) {
4393                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4394         }
4395         evergreen_mc_resume(rdev, &save);
4396         /* we need to own VRAM, so turn off the VGA renderer here
4397          * to stop it overwriting our objects */
4398         rv515_vga_render_disable(rdev);
4399 }
4400
4401 /**
4402  * cik_mc_init - initialize the memory controller driver params
4403  *
4404  * @rdev: radeon_device pointer
4405  *
4406  * Look up the amount of vram, vram width, and decide how to place
4407  * vram and gart within the GPU's physical address space (CIK).
4408  * Returns 0 for success.
4409  */
4410 static int cik_mc_init(struct radeon_device *rdev)
4411 {
4412         u32 tmp;
4413         int chansize, numchan;
4414
4415         /* Get VRAM informations */
4416         rdev->mc.vram_is_ddr = true;
4417         tmp = RREG32(MC_ARB_RAMCFG);
4418         if (tmp & CHANSIZE_MASK) {
4419                 chansize = 64;
4420         } else {
4421                 chansize = 32;
4422         }
4423         tmp = RREG32(MC_SHARED_CHMAP);
4424         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4425         case 0:
4426         default:
4427                 numchan = 1;
4428                 break;
4429         case 1:
4430                 numchan = 2;
4431                 break;
4432         case 2:
4433                 numchan = 4;
4434                 break;
4435         case 3:
4436                 numchan = 8;
4437                 break;
4438         case 4:
4439                 numchan = 3;
4440                 break;
4441         case 5:
4442                 numchan = 6;
4443                 break;
4444         case 6:
4445                 numchan = 10;
4446                 break;
4447         case 7:
4448                 numchan = 12;
4449                 break;
4450         case 8:
4451                 numchan = 16;
4452                 break;
4453         }
4454         rdev->mc.vram_width = numchan * chansize;
4455         /* Could aper size report 0 ? */
4456         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4457         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4458         /* size in MB on si */
4459         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4460         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4461         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4462         si_vram_gtt_location(rdev, &rdev->mc);
4463         radeon_update_bandwidth_info(rdev);
4464
4465         return 0;
4466 }
4467
4468 /*
4469  * GART
4470  * VMID 0 is the physical GPU addresses as used by the kernel.
4471  * VMIDs 1-15 are used for userspace clients and are handled
4472  * by the radeon vm/hsa code.
4473  */
4474 /**
4475  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4476  *
4477  * @rdev: radeon_device pointer
4478  *
4479  * Flush the TLB for the VMID 0 page table (CIK).
4480  */
4481 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4482 {
4483         /* flush hdp cache */
4484         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4485
4486         /* bits 0-15 are the VM contexts0-15 */
4487         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4488 }
4489
4490 /**
4491  * cik_pcie_gart_enable - gart enable
4492  *
4493  * @rdev: radeon_device pointer
4494  *
4495  * This sets up the TLBs, programs the page tables for VMID0,
4496  * sets up the hw for VMIDs 1-15 which are allocated on
4497  * demand, and sets up the global locations for the LDS, GDS,
4498  * and GPUVM for FSA64 clients (CIK).
4499  * Returns 0 for success, errors for failure.
4500  */
4501 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4502 {
4503         int r, i;
4504
4505         if (rdev->gart.robj == NULL) {
4506                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4507                 return -EINVAL;
4508         }
4509         r = radeon_gart_table_vram_pin(rdev);
4510         if (r)
4511                 return r;
4512         radeon_gart_restore(rdev);
4513         /* Setup TLB control */
4514         WREG32(MC_VM_MX_L1_TLB_CNTL,
4515                (0xA << 7) |
4516                ENABLE_L1_TLB |
4517                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4518                ENABLE_ADVANCED_DRIVER_MODEL |
4519                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4520         /* Setup L2 cache */
4521         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4522                ENABLE_L2_FRAGMENT_PROCESSING |
4523                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4524                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4525                EFFECTIVE_L2_QUEUE_SIZE(7) |
4526                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4527         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4528         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4529                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4530         /* setup context0 */
4531         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4532         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4533         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4534         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4535                         (u32)(rdev->dummy_page.addr >> 12));
4536         WREG32(VM_CONTEXT0_CNTL2, 0);
4537         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4538                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4539
4540         WREG32(0x15D4, 0);
4541         WREG32(0x15D8, 0);
4542         WREG32(0x15DC, 0);
4543
4544         /* empty context1-15 */
4545         /* FIXME start with 4G, once using 2 level pt switch to full
4546          * vm size space
4547          */
4548         /* set vm size, must be a multiple of 4 */
4549         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4550         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4551         for (i = 1; i < 16; i++) {
4552                 if (i < 8)
4553                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4554                                rdev->gart.table_addr >> 12);
4555                 else
4556                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4557                                rdev->gart.table_addr >> 12);
4558         }
4559
4560         /* enable context1-15 */
4561         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4562                (u32)(rdev->dummy_page.addr >> 12));
4563         WREG32(VM_CONTEXT1_CNTL2, 4);
4564         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4565                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4566                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4567                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4568                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4569                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4571                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4573                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4575                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4577
4578         /* TC cache setup ??? */
4579         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4580         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4581         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4582
4583         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4584         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4585         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4586         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4587         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4588
4589         WREG32(TC_CFG_L1_VOLATILE, 0);
4590         WREG32(TC_CFG_L2_VOLATILE, 0);
4591
4592         if (rdev->family == CHIP_KAVERI) {
4593                 u32 tmp = RREG32(CHUB_CONTROL);
4594                 tmp &= ~BYPASS_VM;
4595                 WREG32(CHUB_CONTROL, tmp);
4596         }
4597
4598         /* XXX SH_MEM regs */
4599         /* where to put LDS, scratch, GPUVM in FSA64 space */
4600         mutex_lock(&rdev->srbm_mutex);
4601         for (i = 0; i < 16; i++) {
4602                 cik_srbm_select(rdev, 0, 0, 0, i);
4603                 /* CP and shaders */
4604                 WREG32(SH_MEM_CONFIG, 0);
4605                 WREG32(SH_MEM_APE1_BASE, 1);
4606                 WREG32(SH_MEM_APE1_LIMIT, 0);
4607                 WREG32(SH_MEM_BASES, 0);
4608                 /* SDMA GFX */
4609                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4610                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4611                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4612                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4613                 /* XXX SDMA RLC - todo */
4614         }
4615         cik_srbm_select(rdev, 0, 0, 0, 0);
4616         mutex_unlock(&rdev->srbm_mutex);
4617
4618         cik_pcie_gart_tlb_flush(rdev);
4619         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4620                  (unsigned)(rdev->mc.gtt_size >> 20),
4621                  (unsigned long long)rdev->gart.table_addr);
4622         rdev->gart.ready = true;
4623         return 0;
4624 }
4625
4626 /**
4627  * cik_pcie_gart_disable - gart disable
4628  *
4629  * @rdev: radeon_device pointer
4630  *
4631  * This disables all VM page table (CIK).
4632  */
4633 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4634 {
4635         /* Disable all tables */
4636         WREG32(VM_CONTEXT0_CNTL, 0);
4637         WREG32(VM_CONTEXT1_CNTL, 0);
4638         /* Setup TLB control */
4639         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4640                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4641         /* Setup L2 cache */
4642         WREG32(VM_L2_CNTL,
4643                ENABLE_L2_FRAGMENT_PROCESSING |
4644                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4645                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4646                EFFECTIVE_L2_QUEUE_SIZE(7) |
4647                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4648         WREG32(VM_L2_CNTL2, 0);
4649         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4650                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4651         radeon_gart_table_vram_unpin(rdev);
4652 }
4653
4654 /**
4655  * cik_pcie_gart_fini - vm fini callback
4656  *
4657  * @rdev: radeon_device pointer
4658  *
4659  * Tears down the driver GART/VM setup (CIK).
4660  */
4661 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4662 {
4663         cik_pcie_gart_disable(rdev);
4664         radeon_gart_table_vram_free(rdev);
4665         radeon_gart_fini(rdev);
4666 }
4667
4668 /* vm parser */
4669 /**
4670  * cik_ib_parse - vm ib_parse callback
4671  *
4672  * @rdev: radeon_device pointer
4673  * @ib: indirect buffer pointer
4674  *
4675  * CIK uses hw IB checking so this is a nop (CIK).
4676  */
4677 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4678 {
4679         return 0;
4680 }
4681
4682 /*
4683  * vm
4684  * VMID 0 is the physical GPU addresses as used by the kernel.
4685  * VMIDs 1-15 are used for userspace clients and are handled
4686  * by the radeon vm/hsa code.
4687  */
4688 /**
4689  * cik_vm_init - cik vm init callback
4690  *
4691  * @rdev: radeon_device pointer
4692  *
4693  * Inits cik specific vm parameters (number of VMs, base of vram for
4694  * VMIDs 1-15) (CIK).
4695  * Returns 0 for success.
4696  */
4697 int cik_vm_init(struct radeon_device *rdev)
4698 {
4699         /* number of VMs */
4700         rdev->vm_manager.nvm = 16;
4701         /* base offset of vram pages */
4702         if (rdev->flags & RADEON_IS_IGP) {
4703                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4704                 tmp <<= 22;
4705                 rdev->vm_manager.vram_base_offset = tmp;
4706         } else
4707                 rdev->vm_manager.vram_base_offset = 0;
4708
4709         return 0;
4710 }
4711
4712 /**
4713  * cik_vm_fini - cik vm fini callback
4714  *
4715  * @rdev: radeon_device pointer
4716  *
4717  * Tear down any asic specific VM setup (CIK).
4718  */
4719 void cik_vm_fini(struct radeon_device *rdev)
4720 {
4721 }
4722
4723 /**
4724  * cik_vm_decode_fault - print human readable fault info
4725  *
4726  * @rdev: radeon_device pointer
4727  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4728  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4729  *
4730  * Print human readable fault information (CIK).
4731  */
4732 static void cik_vm_decode_fault(struct radeon_device *rdev,
4733                                 u32 status, u32 addr, u32 mc_client)
4734 {
4735         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4736         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4737         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4738         char *block = (char *)&mc_client;
4739
4740         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4741                protections, vmid, addr,
4742                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4743                block, mc_id);
4744 }
4745
4746 /**
4747  * cik_vm_flush - cik vm flush using the CP
4748  *
4749  * @rdev: radeon_device pointer
4750  *
4751  * Update the page table base and flush the VM TLB
4752  * using the CP (CIK).
4753  */
4754 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4755 {
4756         struct radeon_ring *ring = &rdev->ring[ridx];
4757
4758         if (vm == NULL)
4759                 return;
4760
4761         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4762         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4763                                  WRITE_DATA_DST_SEL(0)));
4764         if (vm->id < 8) {
4765                 radeon_ring_write(ring,
4766                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4767         } else {
4768                 radeon_ring_write(ring,
4769                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4770         }
4771         radeon_ring_write(ring, 0);
4772         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4773
4774         /* update SH_MEM_* regs */
4775         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4776         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4777                                  WRITE_DATA_DST_SEL(0)));
4778         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4779         radeon_ring_write(ring, 0);
4780         radeon_ring_write(ring, VMID(vm->id));
4781
4782         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4783         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4784                                  WRITE_DATA_DST_SEL(0)));
4785         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4786         radeon_ring_write(ring, 0);
4787
4788         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4789         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4790         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4791         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4792
4793         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4794         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4795                                  WRITE_DATA_DST_SEL(0)));
4796         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4797         radeon_ring_write(ring, 0);
4798         radeon_ring_write(ring, VMID(0));
4799
4800         /* HDP flush */
4801         /* We should be using the WAIT_REG_MEM packet here like in
4802          * cik_fence_ring_emit(), but it causes the CP to hang in this
4803          * context...
4804          */
4805         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4806         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4807                                  WRITE_DATA_DST_SEL(0)));
4808         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4809         radeon_ring_write(ring, 0);
4810         radeon_ring_write(ring, 0);
4811
4812         /* bits 0-15 are the VM contexts0-15 */
4813         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4814         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4815                                  WRITE_DATA_DST_SEL(0)));
4816         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4817         radeon_ring_write(ring, 0);
4818         radeon_ring_write(ring, 1 << vm->id);
4819
4820         /* compute doesn't have PFP */
4821         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4822                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4823                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4824                 radeon_ring_write(ring, 0x0);
4825         }
4826 }
4827
4828 /**
4829  * cik_vm_set_page - update the page tables using sDMA
4830  *
4831  * @rdev: radeon_device pointer
4832  * @ib: indirect buffer to fill with commands
4833  * @pe: addr of the page entry
4834  * @addr: dst addr to write into pe
4835  * @count: number of page entries to update
4836  * @incr: increase next addr by incr bytes
4837  * @flags: access flags
4838  *
4839  * Update the page tables using CP or sDMA (CIK).
4840  */
4841 void cik_vm_set_page(struct radeon_device *rdev,
4842                      struct radeon_ib *ib,
4843                      uint64_t pe,
4844                      uint64_t addr, unsigned count,
4845                      uint32_t incr, uint32_t flags)
4846 {
4847         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4848         uint64_t value;
4849         unsigned ndw;
4850
4851         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4852                 /* CP */
4853                 while (count) {
4854                         ndw = 2 + count * 2;
4855                         if (ndw > 0x3FFE)
4856                                 ndw = 0x3FFE;
4857
4858                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4859                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4860                                                     WRITE_DATA_DST_SEL(1));
4861                         ib->ptr[ib->length_dw++] = pe;
4862                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4863                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4864                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4865                                         value = radeon_vm_map_gart(rdev, addr);
4866                                         value &= 0xFFFFFFFFFFFFF000ULL;
4867                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4868                                         value = addr;
4869                                 } else {
4870                                         value = 0;
4871                                 }
4872                                 addr += incr;
4873                                 value |= r600_flags;
4874                                 ib->ptr[ib->length_dw++] = value;
4875                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4876                         }
4877                 }
4878         } else {
4879                 /* DMA */
4880                 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4881         }
4882 }
4883
4884 /*
4885  * RLC
4886  * The RLC is a multi-purpose microengine that handles a
4887  * variety of functions, the most important of which is
4888  * the interrupt controller.
4889  */
4890 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4891                                           bool enable)
4892 {
4893         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4894
4895         if (enable)
4896                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4897         else
4898                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4899         WREG32(CP_INT_CNTL_RING0, tmp);
4900 }
4901
4902 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4903 {
4904         u32 tmp;
4905
4906         tmp = RREG32(RLC_LB_CNTL);
4907         if (enable)
4908                 tmp |= LOAD_BALANCE_ENABLE;
4909         else
4910                 tmp &= ~LOAD_BALANCE_ENABLE;
4911         WREG32(RLC_LB_CNTL, tmp);
4912 }
4913
4914 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4915 {
4916         u32 i, j, k;
4917         u32 mask;
4918
4919         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4920                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4921                         cik_select_se_sh(rdev, i, j);
4922                         for (k = 0; k < rdev->usec_timeout; k++) {
4923                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4924                                         break;
4925                                 udelay(1);
4926                         }
4927                 }
4928         }
4929         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4930
4931         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4932         for (k = 0; k < rdev->usec_timeout; k++) {
4933                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4934                         break;
4935                 udelay(1);
4936         }
4937 }
4938
4939 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4940 {
4941         u32 tmp;
4942
4943         tmp = RREG32(RLC_CNTL);
4944         if (tmp != rlc)
4945                 WREG32(RLC_CNTL, rlc);
4946 }
4947
4948 static u32 cik_halt_rlc(struct radeon_device *rdev)
4949 {
4950         u32 data, orig;
4951
4952         orig = data = RREG32(RLC_CNTL);
4953
4954         if (data & RLC_ENABLE) {
4955                 u32 i;
4956
4957                 data &= ~RLC_ENABLE;
4958                 WREG32(RLC_CNTL, data);
4959
4960                 for (i = 0; i < rdev->usec_timeout; i++) {
4961                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4962                                 break;
4963                         udelay(1);
4964                 }
4965
4966                 cik_wait_for_rlc_serdes(rdev);
4967         }
4968
4969         return orig;
4970 }
4971
4972 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4973 {
4974         u32 tmp, i, mask;
4975
4976         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4977         WREG32(RLC_GPR_REG2, tmp);
4978
4979         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4980         for (i = 0; i < rdev->usec_timeout; i++) {
4981                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4982                         break;
4983                 udelay(1);
4984         }
4985
4986         for (i = 0; i < rdev->usec_timeout; i++) {
4987                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4988                         break;
4989                 udelay(1);
4990         }
4991 }
4992
4993 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4994 {
4995         u32 tmp;
4996
4997         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4998         WREG32(RLC_GPR_REG2, tmp);
4999 }
5000
5001 /**
5002  * cik_rlc_stop - stop the RLC ME
5003  *
5004  * @rdev: radeon_device pointer
5005  *
5006  * Halt the RLC ME (MicroEngine) (CIK).
5007  */
5008 static void cik_rlc_stop(struct radeon_device *rdev)
5009 {
5010         WREG32(RLC_CNTL, 0);
5011
5012         cik_enable_gui_idle_interrupt(rdev, false);
5013
5014         cik_wait_for_rlc_serdes(rdev);
5015 }
5016
5017 /**
5018  * cik_rlc_start - start the RLC ME
5019  *
5020  * @rdev: radeon_device pointer
5021  *
5022  * Unhalt the RLC ME (MicroEngine) (CIK).
5023  */
5024 static void cik_rlc_start(struct radeon_device *rdev)
5025 {
5026         WREG32(RLC_CNTL, RLC_ENABLE);
5027
5028         cik_enable_gui_idle_interrupt(rdev, true);
5029
5030         udelay(50);
5031 }
5032
5033 /**
5034  * cik_rlc_resume - setup the RLC hw
5035  *
5036  * @rdev: radeon_device pointer
5037  *
5038  * Initialize the RLC registers, load the ucode,
5039  * and start the RLC (CIK).
5040  * Returns 0 for success, -EINVAL if the ucode is not available.
5041  */
5042 static int cik_rlc_resume(struct radeon_device *rdev)
5043 {
5044         u32 i, size, tmp;
5045         const __be32 *fw_data;
5046
5047         if (!rdev->rlc_fw)
5048                 return -EINVAL;
5049
5050         switch (rdev->family) {
5051         case CHIP_BONAIRE:
5052         default:
5053                 size = BONAIRE_RLC_UCODE_SIZE;
5054                 break;
5055         case CHIP_KAVERI:
5056                 size = KV_RLC_UCODE_SIZE;
5057                 break;
5058         case CHIP_KABINI:
5059                 size = KB_RLC_UCODE_SIZE;
5060                 break;
5061         }
5062
5063         cik_rlc_stop(rdev);
5064
5065         /* disable CG */
5066         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5067         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5068
5069         si_rlc_reset(rdev);
5070
5071         cik_init_pg(rdev);
5072
5073         cik_init_cg(rdev);
5074
5075         WREG32(RLC_LB_CNTR_INIT, 0);
5076         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5077
5078         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5079         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5080         WREG32(RLC_LB_PARAMS, 0x00600408);
5081         WREG32(RLC_LB_CNTL, 0x80000004);
5082
5083         WREG32(RLC_MC_CNTL, 0);
5084         WREG32(RLC_UCODE_CNTL, 0);
5085
5086         fw_data = (const __be32 *)rdev->rlc_fw->data;
5087                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5088         for (i = 0; i < size; i++)
5089                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5090         WREG32(RLC_GPM_UCODE_ADDR, 0);
5091
5092         /* XXX - find out what chips support lbpw */
5093         cik_enable_lbpw(rdev, false);
5094
5095         if (rdev->family == CHIP_BONAIRE)
5096                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5097
5098         cik_rlc_start(rdev);
5099
5100         return 0;
5101 }
5102
5103 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5104 {
5105         u32 data, orig, tmp, tmp2;
5106
5107         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5108
5109         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5110                 cik_enable_gui_idle_interrupt(rdev, true);
5111
5112                 tmp = cik_halt_rlc(rdev);
5113
5114                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5115                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5116                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5117                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5118                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5119
5120                 cik_update_rlc(rdev, tmp);
5121
5122                 data |= CGCG_EN | CGLS_EN;
5123         } else {
5124                 cik_enable_gui_idle_interrupt(rdev, false);
5125
5126                 RREG32(CB_CGTT_SCLK_CTRL);
5127                 RREG32(CB_CGTT_SCLK_CTRL);
5128                 RREG32(CB_CGTT_SCLK_CTRL);
5129                 RREG32(CB_CGTT_SCLK_CTRL);
5130
5131                 data &= ~(CGCG_EN | CGLS_EN);
5132         }
5133
5134         if (orig != data)
5135                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5136
5137 }
5138
5139 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5140 {
5141         u32 data, orig, tmp = 0;
5142
5143         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5144                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5145                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5146                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5147                                 data |= CP_MEM_LS_EN;
5148                                 if (orig != data)
5149                                         WREG32(CP_MEM_SLP_CNTL, data);
5150                         }
5151                 }
5152
5153                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5154                 data &= 0xfffffffd;
5155                 if (orig != data)
5156                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5157
5158                 tmp = cik_halt_rlc(rdev);
5159
5160                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5161                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5162                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5163                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5164                 WREG32(RLC_SERDES_WR_CTRL, data);
5165
5166                 cik_update_rlc(rdev, tmp);
5167
5168                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5169                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5170                         data &= ~SM_MODE_MASK;
5171                         data |= SM_MODE(0x2);
5172                         data |= SM_MODE_ENABLE;
5173                         data &= ~CGTS_OVERRIDE;
5174                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5175                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5176                                 data &= ~CGTS_LS_OVERRIDE;
5177                         data &= ~ON_MONITOR_ADD_MASK;
5178                         data |= ON_MONITOR_ADD_EN;
5179                         data |= ON_MONITOR_ADD(0x96);
5180                         if (orig != data)
5181                                 WREG32(CGTS_SM_CTRL_REG, data);
5182                 }
5183         } else {
5184                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5185                 data |= 0x00000002;
5186                 if (orig != data)
5187                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5188
5189                 data = RREG32(RLC_MEM_SLP_CNTL);
5190                 if (data & RLC_MEM_LS_EN) {
5191                         data &= ~RLC_MEM_LS_EN;
5192                         WREG32(RLC_MEM_SLP_CNTL, data);
5193                 }
5194
5195                 data = RREG32(CP_MEM_SLP_CNTL);
5196                 if (data & CP_MEM_LS_EN) {
5197                         data &= ~CP_MEM_LS_EN;
5198                         WREG32(CP_MEM_SLP_CNTL, data);
5199                 }
5200
5201                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5202                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5203                 if (orig != data)
5204                         WREG32(CGTS_SM_CTRL_REG, data);
5205
5206                 tmp = cik_halt_rlc(rdev);
5207
5208                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5209                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5210                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5211                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5212                 WREG32(RLC_SERDES_WR_CTRL, data);
5213
5214                 cik_update_rlc(rdev, tmp);
5215         }
5216 }
5217
5218 static const u32 mc_cg_registers[] =
5219 {
5220         MC_HUB_MISC_HUB_CG,
5221         MC_HUB_MISC_SIP_CG,
5222         MC_HUB_MISC_VM_CG,
5223         MC_XPB_CLK_GAT,
5224         ATC_MISC_CG,
5225         MC_CITF_MISC_WR_CG,
5226         MC_CITF_MISC_RD_CG,
5227         MC_CITF_MISC_VM_CG,
5228         VM_L2_CG,
5229 };
5230
5231 static void cik_enable_mc_ls(struct radeon_device *rdev,
5232                              bool enable)
5233 {
5234         int i;
5235         u32 orig, data;
5236
5237         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5238                 orig = data = RREG32(mc_cg_registers[i]);
5239                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5240                         data |= MC_LS_ENABLE;
5241                 else
5242                         data &= ~MC_LS_ENABLE;
5243                 if (data != orig)
5244                         WREG32(mc_cg_registers[i], data);
5245         }
5246 }
5247
5248 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5249                                bool enable)
5250 {
5251         int i;
5252         u32 orig, data;
5253
5254         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5255                 orig = data = RREG32(mc_cg_registers[i]);
5256                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5257                         data |= MC_CG_ENABLE;
5258                 else
5259                         data &= ~MC_CG_ENABLE;
5260                 if (data != orig)
5261                         WREG32(mc_cg_registers[i], data);
5262         }
5263 }
5264
5265 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5266                                  bool enable)
5267 {
5268         u32 orig, data;
5269
5270         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5271                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5272                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5273         } else {
5274                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5275                 data |= 0xff000000;
5276                 if (data != orig)
5277                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5278
5279                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5280                 data |= 0xff000000;
5281                 if (data != orig)
5282                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5283         }
5284 }
5285
5286 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5287                                  bool enable)
5288 {
5289         u32 orig, data;
5290
5291         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5292                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5293                 data |= 0x100;
5294                 if (orig != data)
5295                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5296
5297                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5298                 data |= 0x100;
5299                 if (orig != data)
5300                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5301         } else {
5302                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5303                 data &= ~0x100;
5304                 if (orig != data)
5305                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5306
5307                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5308                 data &= ~0x100;
5309                 if (orig != data)
5310                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5311         }
5312 }
5313
5314 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5315                                 bool enable)
5316 {
5317         u32 orig, data;
5318
5319         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5320                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5321                 data = 0xfff;
5322                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5323
5324                 orig = data = RREG32(UVD_CGC_CTRL);
5325                 data |= DCM;
5326                 if (orig != data)
5327                         WREG32(UVD_CGC_CTRL, data);
5328         } else {
5329                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5330                 data &= ~0xfff;
5331                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5332
5333                 orig = data = RREG32(UVD_CGC_CTRL);
5334                 data &= ~DCM;
5335                 if (orig != data)
5336                         WREG32(UVD_CGC_CTRL, data);
5337         }
5338 }
5339
5340 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5341                                bool enable)
5342 {
5343         u32 orig, data;
5344
5345         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5346
5347         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5348                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5349                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5350         else
5351                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5352                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5353
5354         if (orig != data)
5355                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5356 }
5357
5358 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5359                                 bool enable)
5360 {
5361         u32 orig, data;
5362
5363         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5364
5365         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5366                 data &= ~CLOCK_GATING_DIS;
5367         else
5368                 data |= CLOCK_GATING_DIS;
5369
5370         if (orig != data)
5371                 WREG32(HDP_HOST_PATH_CNTL, data);
5372 }
5373
5374 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5375                               bool enable)
5376 {
5377         u32 orig, data;
5378
5379         orig = data = RREG32(HDP_MEM_POWER_LS);
5380
5381         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5382                 data |= HDP_LS_ENABLE;
5383         else
5384                 data &= ~HDP_LS_ENABLE;
5385
5386         if (orig != data)
5387                 WREG32(HDP_MEM_POWER_LS, data);
5388 }
5389
5390 void cik_update_cg(struct radeon_device *rdev,
5391                    u32 block, bool enable)
5392 {
5393
5394         if (block & RADEON_CG_BLOCK_GFX) {
5395                 cik_enable_gui_idle_interrupt(rdev, false);
5396                 /* order matters! */
5397                 if (enable) {
5398                         cik_enable_mgcg(rdev, true);
5399                         cik_enable_cgcg(rdev, true);
5400                 } else {
5401                         cik_enable_cgcg(rdev, false);
5402                         cik_enable_mgcg(rdev, false);
5403                 }
5404                 cik_enable_gui_idle_interrupt(rdev, true);
5405         }
5406
5407         if (block & RADEON_CG_BLOCK_MC) {
5408                 if (!(rdev->flags & RADEON_IS_IGP)) {
5409                         cik_enable_mc_mgcg(rdev, enable);
5410                         cik_enable_mc_ls(rdev, enable);
5411                 }
5412         }
5413
5414         if (block & RADEON_CG_BLOCK_SDMA) {
5415                 cik_enable_sdma_mgcg(rdev, enable);
5416                 cik_enable_sdma_mgls(rdev, enable);
5417         }
5418
5419         if (block & RADEON_CG_BLOCK_BIF) {
5420                 cik_enable_bif_mgls(rdev, enable);
5421         }
5422
5423         if (block & RADEON_CG_BLOCK_UVD) {
5424                 if (rdev->has_uvd)
5425                         cik_enable_uvd_mgcg(rdev, enable);
5426         }
5427
5428         if (block & RADEON_CG_BLOCK_HDP) {
5429                 cik_enable_hdp_mgcg(rdev, enable);
5430                 cik_enable_hdp_ls(rdev, enable);
5431         }
5432 }
5433
5434 static void cik_init_cg(struct radeon_device *rdev)
5435 {
5436
5437         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5438
5439         if (rdev->has_uvd)
5440                 si_init_uvd_internal_cg(rdev);
5441
5442         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5443                              RADEON_CG_BLOCK_SDMA |
5444                              RADEON_CG_BLOCK_BIF |
5445                              RADEON_CG_BLOCK_UVD |
5446                              RADEON_CG_BLOCK_HDP), true);
5447 }
5448
5449 static void cik_fini_cg(struct radeon_device *rdev)
5450 {
5451         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5452                              RADEON_CG_BLOCK_SDMA |
5453                              RADEON_CG_BLOCK_BIF |
5454                              RADEON_CG_BLOCK_UVD |
5455                              RADEON_CG_BLOCK_HDP), false);
5456
5457         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5458 }
5459
5460 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5461                                           bool enable)
5462 {
5463         u32 data, orig;
5464
5465         orig = data = RREG32(RLC_PG_CNTL);
5466         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5467                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5468         else
5469                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5470         if (orig != data)
5471                 WREG32(RLC_PG_CNTL, data);
5472 }
5473
5474 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5475                                           bool enable)
5476 {
5477         u32 data, orig;
5478
5479         orig = data = RREG32(RLC_PG_CNTL);
5480         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5481                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5482         else
5483                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5484         if (orig != data)
5485                 WREG32(RLC_PG_CNTL, data);
5486 }
5487
5488 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5489 {
5490         u32 data, orig;
5491
5492         orig = data = RREG32(RLC_PG_CNTL);
5493         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5494                 data &= ~DISABLE_CP_PG;
5495         else
5496                 data |= DISABLE_CP_PG;
5497         if (orig != data)
5498                 WREG32(RLC_PG_CNTL, data);
5499 }
5500
5501 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5502 {
5503         u32 data, orig;
5504
5505         orig = data = RREG32(RLC_PG_CNTL);
5506         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5507                 data &= ~DISABLE_GDS_PG;
5508         else
5509                 data |= DISABLE_GDS_PG;
5510         if (orig != data)
5511                 WREG32(RLC_PG_CNTL, data);
5512 }
5513
5514 #define CP_ME_TABLE_SIZE    96
5515 #define CP_ME_TABLE_OFFSET  2048
5516 #define CP_MEC_TABLE_OFFSET 4096
5517
5518 void cik_init_cp_pg_table(struct radeon_device *rdev)
5519 {
5520         const __be32 *fw_data;
5521         volatile u32 *dst_ptr;
5522         int me, i, max_me = 4;
5523         u32 bo_offset = 0;
5524         u32 table_offset;
5525
5526         if (rdev->family == CHIP_KAVERI)
5527                 max_me = 5;
5528
5529         if (rdev->rlc.cp_table_ptr == NULL)
5530                 return;
5531
5532         /* write the cp table buffer */
5533         dst_ptr = rdev->rlc.cp_table_ptr;
5534         for (me = 0; me < max_me; me++) {
5535                 if (me == 0) {
5536                         fw_data = (const __be32 *)rdev->ce_fw->data;
5537                         table_offset = CP_ME_TABLE_OFFSET;
5538                 } else if (me == 1) {
5539                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5540                         table_offset = CP_ME_TABLE_OFFSET;
5541                 } else if (me == 2) {
5542                         fw_data = (const __be32 *)rdev->me_fw->data;
5543                         table_offset = CP_ME_TABLE_OFFSET;
5544                 } else {
5545                         fw_data = (const __be32 *)rdev->mec_fw->data;
5546                         table_offset = CP_MEC_TABLE_OFFSET;
5547                 }
5548
5549                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5550                         dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5551                 }
5552                 bo_offset += CP_ME_TABLE_SIZE;
5553         }
5554 }
5555
5556 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5557                                 bool enable)
5558 {
5559         u32 data, orig;
5560
5561         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5562                 orig = data = RREG32(RLC_PG_CNTL);
5563                 data |= GFX_PG_ENABLE;
5564                 if (orig != data)
5565                         WREG32(RLC_PG_CNTL, data);
5566
5567                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5568                 data |= AUTO_PG_EN;
5569                 if (orig != data)
5570                         WREG32(RLC_AUTO_PG_CTRL, data);
5571         } else {
5572                 orig = data = RREG32(RLC_PG_CNTL);
5573                 data &= ~GFX_PG_ENABLE;
5574                 if (orig != data)
5575                         WREG32(RLC_PG_CNTL, data);
5576
5577                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5578                 data &= ~AUTO_PG_EN;
5579                 if (orig != data)
5580                         WREG32(RLC_AUTO_PG_CTRL, data);
5581
5582                 data = RREG32(DB_RENDER_CONTROL);
5583         }
5584 }
5585
5586 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5587 {
5588         u32 mask = 0, tmp, tmp1;
5589         int i;
5590
5591         cik_select_se_sh(rdev, se, sh);
5592         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5593         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5594         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5595
5596         tmp &= 0xffff0000;
5597
5598         tmp |= tmp1;
5599         tmp >>= 16;
5600
5601         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5602                 mask <<= 1;
5603                 mask |= 1;
5604         }
5605
5606         return (~tmp) & mask;
5607 }
5608
5609 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5610 {
5611         u32 i, j, k, active_cu_number = 0;
5612         u32 mask, counter, cu_bitmap;
5613         u32 tmp = 0;
5614
5615         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5616                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5617                         mask = 1;
5618                         cu_bitmap = 0;
5619                         counter = 0;
5620                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5621                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5622                                         if (counter < 2)
5623                                                 cu_bitmap |= mask;
5624                                         counter ++;
5625                                 }
5626                                 mask <<= 1;
5627                         }
5628
5629                         active_cu_number += counter;
5630                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5631                 }
5632         }
5633
5634         WREG32(RLC_PG_AO_CU_MASK, tmp);
5635
5636         tmp = RREG32(RLC_MAX_PG_CU);
5637         tmp &= ~MAX_PU_CU_MASK;
5638         tmp |= MAX_PU_CU(active_cu_number);
5639         WREG32(RLC_MAX_PG_CU, tmp);
5640 }
5641
5642 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5643                                        bool enable)
5644 {
5645         u32 data, orig;
5646
5647         orig = data = RREG32(RLC_PG_CNTL);
5648         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5649                 data |= STATIC_PER_CU_PG_ENABLE;
5650         else
5651                 data &= ~STATIC_PER_CU_PG_ENABLE;
5652         if (orig != data)
5653                 WREG32(RLC_PG_CNTL, data);
5654 }
5655
5656 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5657                                         bool enable)
5658 {
5659         u32 data, orig;
5660
5661         orig = data = RREG32(RLC_PG_CNTL);
5662         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5663                 data |= DYN_PER_CU_PG_ENABLE;
5664         else
5665                 data &= ~DYN_PER_CU_PG_ENABLE;
5666         if (orig != data)
5667                 WREG32(RLC_PG_CNTL, data);
5668 }
5669
5670 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5671 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5672
5673 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5674 {
5675         u32 data, orig;
5676         u32 i;
5677
5678         if (rdev->rlc.cs_data) {
5679                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5680                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5681                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5682                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5683         } else {
5684                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5685                 for (i = 0; i < 3; i++)
5686                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5687         }
5688         if (rdev->rlc.reg_list) {
5689                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5690                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5691                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5692         }
5693
5694         orig = data = RREG32(RLC_PG_CNTL);
5695         data |= GFX_PG_SRC;
5696         if (orig != data)
5697                 WREG32(RLC_PG_CNTL, data);
5698
5699         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5700         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5701
5702         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5703         data &= ~IDLE_POLL_COUNT_MASK;
5704         data |= IDLE_POLL_COUNT(0x60);
5705         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5706
5707         data = 0x10101010;
5708         WREG32(RLC_PG_DELAY, data);
5709
5710         data = RREG32(RLC_PG_DELAY_2);
5711         data &= ~0xff;
5712         data |= 0x3;
5713         WREG32(RLC_PG_DELAY_2, data);
5714
5715         data = RREG32(RLC_AUTO_PG_CTRL);
5716         data &= ~GRBM_REG_SGIT_MASK;
5717         data |= GRBM_REG_SGIT(0x700);
5718         WREG32(RLC_AUTO_PG_CTRL, data);
5719
5720 }
5721
5722 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5723 {
5724         cik_enable_gfx_cgpg(rdev, enable);
5725         cik_enable_gfx_static_mgpg(rdev, enable);
5726         cik_enable_gfx_dynamic_mgpg(rdev, enable);
5727 }
5728
5729 u32 cik_get_csb_size(struct radeon_device *rdev)
5730 {
5731         u32 count = 0;
5732         const struct cs_section_def *sect = NULL;
5733         const struct cs_extent_def *ext = NULL;
5734
5735         if (rdev->rlc.cs_data == NULL)
5736                 return 0;
5737
5738         /* begin clear state */
5739         count += 2;
5740         /* context control state */
5741         count += 3;
5742
5743         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5744                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5745                         if (sect->id == SECT_CONTEXT)
5746                                 count += 2 + ext->reg_count;
5747                         else
5748                                 return 0;
5749                 }
5750         }
5751         /* pa_sc_raster_config/pa_sc_raster_config1 */
5752         count += 4;
5753         /* end clear state */
5754         count += 2;
5755         /* clear state */
5756         count += 2;
5757
5758         return count;
5759 }
5760
5761 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5762 {
5763         u32 count = 0, i;
5764         const struct cs_section_def *sect = NULL;
5765         const struct cs_extent_def *ext = NULL;
5766
5767         if (rdev->rlc.cs_data == NULL)
5768                 return;
5769         if (buffer == NULL)
5770                 return;
5771
5772         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5773         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5774
5775         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5776         buffer[count++] = 0x80000000;
5777         buffer[count++] = 0x80000000;
5778
5779         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5780                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5781                         if (sect->id == SECT_CONTEXT) {
5782                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5783                                 buffer[count++] = ext->reg_index - 0xa000;
5784                                 for (i = 0; i < ext->reg_count; i++)
5785                                         buffer[count++] = ext->extent[i];
5786                         } else {
5787                                 return;
5788                         }
5789                 }
5790         }
5791
5792         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5793         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5794         switch (rdev->family) {
5795         case CHIP_BONAIRE:
5796                 buffer[count++] = 0x16000012;
5797                 buffer[count++] = 0x00000000;
5798                 break;
5799         case CHIP_KAVERI:
5800                 buffer[count++] = 0x00000000; /* XXX */
5801                 buffer[count++] = 0x00000000;
5802                 break;
5803         case CHIP_KABINI:
5804                 buffer[count++] = 0x00000000; /* XXX */
5805                 buffer[count++] = 0x00000000;
5806                 break;
5807         default:
5808                 buffer[count++] = 0x00000000;
5809                 buffer[count++] = 0x00000000;
5810                 break;
5811         }
5812
5813         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5814         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5815
5816         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5817         buffer[count++] = 0;
5818 }
5819
5820 static void cik_init_pg(struct radeon_device *rdev)
5821 {
5822         if (rdev->pg_flags) {
5823                 cik_enable_sck_slowdown_on_pu(rdev, true);
5824                 cik_enable_sck_slowdown_on_pd(rdev, true);
5825                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5826                         cik_init_gfx_cgpg(rdev);
5827                         cik_enable_cp_pg(rdev, true);
5828                         cik_enable_gds_pg(rdev, true);
5829                 }
5830                 cik_init_ao_cu_mask(rdev);
5831                 cik_update_gfx_pg(rdev, true);
5832         }
5833 }
5834
5835 static void cik_fini_pg(struct radeon_device *rdev)
5836 {
5837         if (rdev->pg_flags) {
5838                 cik_update_gfx_pg(rdev, false);
5839                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5840                         cik_enable_cp_pg(rdev, false);
5841                         cik_enable_gds_pg(rdev, false);
5842                 }
5843         }
5844 }
5845
5846 /*
5847  * Interrupts
5848  * Starting with r6xx, interrupts are handled via a ring buffer.
5849  * Ring buffers are areas of GPU accessible memory that the GPU
5850  * writes interrupt vectors into and the host reads vectors out of.
5851  * There is a rptr (read pointer) that determines where the
5852  * host is currently reading, and a wptr (write pointer)
5853  * which determines where the GPU has written.  When the
5854  * pointers are equal, the ring is idle.  When the GPU
5855  * writes vectors to the ring buffer, it increments the
5856  * wptr.  When there is an interrupt, the host then starts
5857  * fetching commands and processing them until the pointers are
5858  * equal again at which point it updates the rptr.
5859  */
5860
5861 /**
5862  * cik_enable_interrupts - Enable the interrupt ring buffer
5863  *
5864  * @rdev: radeon_device pointer
5865  *
5866  * Enable the interrupt ring buffer (CIK).
5867  */
5868 static void cik_enable_interrupts(struct radeon_device *rdev)
5869 {
5870         u32 ih_cntl = RREG32(IH_CNTL);
5871         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5872
5873         ih_cntl |= ENABLE_INTR;
5874         ih_rb_cntl |= IH_RB_ENABLE;
5875         WREG32(IH_CNTL, ih_cntl);
5876         WREG32(IH_RB_CNTL, ih_rb_cntl);
5877         rdev->ih.enabled = true;
5878 }
5879
5880 /**
5881  * cik_disable_interrupts - Disable the interrupt ring buffer
5882  *
5883  * @rdev: radeon_device pointer
5884  *
5885  * Disable the interrupt ring buffer (CIK).
5886  */
5887 static void cik_disable_interrupts(struct radeon_device *rdev)
5888 {
5889         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5890         u32 ih_cntl = RREG32(IH_CNTL);
5891
5892         ih_rb_cntl &= ~IH_RB_ENABLE;
5893         ih_cntl &= ~ENABLE_INTR;
5894         WREG32(IH_RB_CNTL, ih_rb_cntl);
5895         WREG32(IH_CNTL, ih_cntl);
5896         /* set rptr, wptr to 0 */
5897         WREG32(IH_RB_RPTR, 0);
5898         WREG32(IH_RB_WPTR, 0);
5899         rdev->ih.enabled = false;
5900         rdev->ih.rptr = 0;
5901 }
5902
5903 /**
5904  * cik_disable_interrupt_state - Disable all interrupt sources
5905  *
5906  * @rdev: radeon_device pointer
5907  *
5908  * Clear all interrupt enable bits used by the driver (CIK).
5909  */
5910 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5911 {
5912         u32 tmp;
5913
5914         /* gfx ring */
5915         tmp = RREG32(CP_INT_CNTL_RING0) &
5916                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5917         WREG32(CP_INT_CNTL_RING0, tmp);
5918         /* sdma */
5919         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5920         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5921         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5922         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5923         /* compute queues */
5924         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5925         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5926         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5927         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5928         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5929         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5930         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5931         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5932         /* grbm */
5933         WREG32(GRBM_INT_CNTL, 0);
5934         /* vline/vblank, etc. */
5935         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5936         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5937         if (rdev->num_crtc >= 4) {
5938                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5939                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5940         }
5941         if (rdev->num_crtc >= 6) {
5942                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5943                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5944         }
5945
5946         /* dac hotplug */
5947         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5948
5949         /* digital hotplug */
5950         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951         WREG32(DC_HPD1_INT_CONTROL, tmp);
5952         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953         WREG32(DC_HPD2_INT_CONTROL, tmp);
5954         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955         WREG32(DC_HPD3_INT_CONTROL, tmp);
5956         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5957         WREG32(DC_HPD4_INT_CONTROL, tmp);
5958         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5959         WREG32(DC_HPD5_INT_CONTROL, tmp);
5960         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5961         WREG32(DC_HPD6_INT_CONTROL, tmp);
5962
5963 }
5964
5965 /**
5966  * cik_irq_init - init and enable the interrupt ring
5967  *
5968  * @rdev: radeon_device pointer
5969  *
5970  * Allocate a ring buffer for the interrupt controller,
5971  * enable the RLC, disable interrupts, enable the IH
5972  * ring buffer and enable it (CIK).
5973  * Called at device load and reume.
5974  * Returns 0 for success, errors for failure.
5975  */
5976 static int cik_irq_init(struct radeon_device *rdev)
5977 {
5978         int ret = 0;
5979         int rb_bufsz;
5980         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5981
5982         /* allocate ring */
5983         ret = r600_ih_ring_alloc(rdev);
5984         if (ret)
5985                 return ret;
5986
5987         /* disable irqs */
5988         cik_disable_interrupts(rdev);
5989
5990         /* init rlc */
5991         ret = cik_rlc_resume(rdev);
5992         if (ret) {
5993                 r600_ih_ring_fini(rdev);
5994                 return ret;
5995         }
5996
5997         /* setup interrupt control */
5998         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5999         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6000         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6001         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6002          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6003          */
6004         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6005         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6006         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6007         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6008
6009         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6010         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6011
6012         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6013                       IH_WPTR_OVERFLOW_CLEAR |
6014                       (rb_bufsz << 1));
6015
6016         if (rdev->wb.enabled)
6017                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6018
6019         /* set the writeback address whether it's enabled or not */
6020         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6021         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6022
6023         WREG32(IH_RB_CNTL, ih_rb_cntl);
6024
6025         /* set rptr, wptr to 0 */
6026         WREG32(IH_RB_RPTR, 0);
6027         WREG32(IH_RB_WPTR, 0);
6028
6029         /* Default settings for IH_CNTL (disabled at first) */
6030         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6031         /* RPTR_REARM only works if msi's are enabled */
6032         if (rdev->msi_enabled)
6033                 ih_cntl |= RPTR_REARM;
6034         WREG32(IH_CNTL, ih_cntl);
6035
6036         /* force the active interrupt state to all disabled */
6037         cik_disable_interrupt_state(rdev);
6038
6039         pci_set_master(rdev->pdev);
6040
6041         /* enable irqs */
6042         cik_enable_interrupts(rdev);
6043
6044         return ret;
6045 }
6046
6047 /**
6048  * cik_irq_set - enable/disable interrupt sources
6049  *
6050  * @rdev: radeon_device pointer
6051  *
6052  * Enable interrupt sources on the GPU (vblanks, hpd,
6053  * etc.) (CIK).
6054  * Returns 0 for success, errors for failure.
6055  */
6056 int cik_irq_set(struct radeon_device *rdev)
6057 {
6058         u32 cp_int_cntl;
6059         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6060         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6061         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6062         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6063         u32 grbm_int_cntl = 0;
6064         u32 dma_cntl, dma_cntl1;
6065         u32 thermal_int;
6066
6067         if (!rdev->irq.installed) {
6068                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6069                 return -EINVAL;
6070         }
6071         /* don't enable anything if the ih is disabled */
6072         if (!rdev->ih.enabled) {
6073                 cik_disable_interrupts(rdev);
6074                 /* force the active interrupt state to all disabled */
6075                 cik_disable_interrupt_state(rdev);
6076                 return 0;
6077         }
6078
6079         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6080                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6081         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6082
6083         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6084         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6085         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6086         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6087         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6088         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6089
6090         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6091         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6092
6093         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6094         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6095         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6096         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6097         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6098         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6100         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6101
6102         if (rdev->flags & RADEON_IS_IGP)
6103                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6104                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6105         else
6106                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6107                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6108
6109         /* enable CP interrupts on all rings */
6110         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6111                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6112                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6113         }
6114         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6115                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6116                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6117                 if (ring->me == 1) {
6118                         switch (ring->pipe) {
6119                         case 0:
6120                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6121                                 break;
6122                         case 1:
6123                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6124                                 break;
6125                         case 2:
6126                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6127                                 break;
6128                         case 3:
6129                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6130                                 break;
6131                         default:
6132                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6133                                 break;
6134                         }
6135                 } else if (ring->me == 2) {
6136                         switch (ring->pipe) {
6137                         case 0:
6138                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6139                                 break;
6140                         case 1:
6141                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6142                                 break;
6143                         case 2:
6144                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6145                                 break;
6146                         case 3:
6147                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6148                                 break;
6149                         default:
6150                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6151                                 break;
6152                         }
6153                 } else {
6154                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6155                 }
6156         }
6157         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6158                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6159                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6160                 if (ring->me == 1) {
6161                         switch (ring->pipe) {
6162                         case 0:
6163                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6164                                 break;
6165                         case 1:
6166                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6167                                 break;
6168                         case 2:
6169                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6170                                 break;
6171                         case 3:
6172                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6173                                 break;
6174                         default:
6175                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6176                                 break;
6177                         }
6178                 } else if (ring->me == 2) {
6179                         switch (ring->pipe) {
6180                         case 0:
6181                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6182                                 break;
6183                         case 1:
6184                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6185                                 break;
6186                         case 2:
6187                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6188                                 break;
6189                         case 3:
6190                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6191                                 break;
6192                         default:
6193                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6194                                 break;
6195                         }
6196                 } else {
6197                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6198                 }
6199         }
6200
6201         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6202                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6203                 dma_cntl |= TRAP_ENABLE;
6204         }
6205
6206         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6207                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6208                 dma_cntl1 |= TRAP_ENABLE;
6209         }
6210
6211         if (rdev->irq.crtc_vblank_int[0] ||
6212             atomic_read(&rdev->irq.pflip[0])) {
6213                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6214                 crtc1 |= VBLANK_INTERRUPT_MASK;
6215         }
6216         if (rdev->irq.crtc_vblank_int[1] ||
6217             atomic_read(&rdev->irq.pflip[1])) {
6218                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6219                 crtc2 |= VBLANK_INTERRUPT_MASK;
6220         }
6221         if (rdev->irq.crtc_vblank_int[2] ||
6222             atomic_read(&rdev->irq.pflip[2])) {
6223                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6224                 crtc3 |= VBLANK_INTERRUPT_MASK;
6225         }
6226         if (rdev->irq.crtc_vblank_int[3] ||
6227             atomic_read(&rdev->irq.pflip[3])) {
6228                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6229                 crtc4 |= VBLANK_INTERRUPT_MASK;
6230         }
6231         if (rdev->irq.crtc_vblank_int[4] ||
6232             atomic_read(&rdev->irq.pflip[4])) {
6233                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6234                 crtc5 |= VBLANK_INTERRUPT_MASK;
6235         }
6236         if (rdev->irq.crtc_vblank_int[5] ||
6237             atomic_read(&rdev->irq.pflip[5])) {
6238                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6239                 crtc6 |= VBLANK_INTERRUPT_MASK;
6240         }
6241         if (rdev->irq.hpd[0]) {
6242                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6243                 hpd1 |= DC_HPDx_INT_EN;
6244         }
6245         if (rdev->irq.hpd[1]) {
6246                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6247                 hpd2 |= DC_HPDx_INT_EN;
6248         }
6249         if (rdev->irq.hpd[2]) {
6250                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6251                 hpd3 |= DC_HPDx_INT_EN;
6252         }
6253         if (rdev->irq.hpd[3]) {
6254                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6255                 hpd4 |= DC_HPDx_INT_EN;
6256         }
6257         if (rdev->irq.hpd[4]) {
6258                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6259                 hpd5 |= DC_HPDx_INT_EN;
6260         }
6261         if (rdev->irq.hpd[5]) {
6262                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6263                 hpd6 |= DC_HPDx_INT_EN;
6264         }
6265
6266         if (rdev->irq.dpm_thermal) {
6267                 DRM_DEBUG("dpm thermal\n");
6268                 if (rdev->flags & RADEON_IS_IGP)
6269                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6270                 else
6271                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6272         }
6273
6274         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6275
6276         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6277         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6278
6279         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6280         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6281         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6282         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6283         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6284         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6285         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6286         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6287
6288         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6289
6290         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6291         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6292         if (rdev->num_crtc >= 4) {
6293                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6294                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6295         }
6296         if (rdev->num_crtc >= 6) {
6297                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6298                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6299         }
6300
6301         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6302         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6303         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6304         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6305         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6306         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6307
6308         if (rdev->flags & RADEON_IS_IGP)
6309                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6310         else
6311                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6312
6313         return 0;
6314 }
6315
6316 /**
6317  * cik_irq_ack - ack interrupt sources
6318  *
6319  * @rdev: radeon_device pointer
6320  *
6321  * Ack interrupt sources on the GPU (vblanks, hpd,
6322  * etc.) (CIK).  Certain interrupts sources are sw
6323  * generated and do not require an explicit ack.
6324  */
6325 static inline void cik_irq_ack(struct radeon_device *rdev)
6326 {
6327         u32 tmp;
6328
6329         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6330         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6331         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6332         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6333         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6334         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6335         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6336
6337         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6338                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6339         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6340                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6341         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6342                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6343         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6344                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6345
6346         if (rdev->num_crtc >= 4) {
6347                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6348                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6349                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6350                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6351                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6352                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6353                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6354                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6355         }
6356
6357         if (rdev->num_crtc >= 6) {
6358                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6359                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6360                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6361                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6362                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6363                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6364                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6365                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6366         }
6367
6368         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6369                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6370                 tmp |= DC_HPDx_INT_ACK;
6371                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6372         }
6373         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6374                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6375                 tmp |= DC_HPDx_INT_ACK;
6376                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6377         }
6378         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6379                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6380                 tmp |= DC_HPDx_INT_ACK;
6381                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6382         }
6383         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6384                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6385                 tmp |= DC_HPDx_INT_ACK;
6386                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6387         }
6388         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6389                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6390                 tmp |= DC_HPDx_INT_ACK;
6391                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6392         }
6393         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6394                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6395                 tmp |= DC_HPDx_INT_ACK;
6396                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6397         }
6398 }
6399
6400 /**
6401  * cik_irq_disable - disable interrupts
6402  *
6403  * @rdev: radeon_device pointer
6404  *
6405  * Disable interrupts on the hw (CIK).
6406  */
6407 static void cik_irq_disable(struct radeon_device *rdev)
6408 {
6409         cik_disable_interrupts(rdev);
6410         /* Wait and acknowledge irq */
6411         mdelay(1);
6412         cik_irq_ack(rdev);
6413         cik_disable_interrupt_state(rdev);
6414 }
6415
6416 /**
6417  * cik_irq_disable - disable interrupts for suspend
6418  *
6419  * @rdev: radeon_device pointer
6420  *
6421  * Disable interrupts and stop the RLC (CIK).
6422  * Used for suspend.
6423  */
6424 static void cik_irq_suspend(struct radeon_device *rdev)
6425 {
6426         cik_irq_disable(rdev);
6427         cik_rlc_stop(rdev);
6428 }
6429
6430 /**
6431  * cik_irq_fini - tear down interrupt support
6432  *
6433  * @rdev: radeon_device pointer
6434  *
6435  * Disable interrupts on the hw and free the IH ring
6436  * buffer (CIK).
6437  * Used for driver unload.
6438  */
6439 static void cik_irq_fini(struct radeon_device *rdev)
6440 {
6441         cik_irq_suspend(rdev);
6442         r600_ih_ring_fini(rdev);
6443 }
6444
6445 /**
6446  * cik_get_ih_wptr - get the IH ring buffer wptr
6447  *
6448  * @rdev: radeon_device pointer
6449  *
6450  * Get the IH ring buffer wptr from either the register
6451  * or the writeback memory buffer (CIK).  Also check for
6452  * ring buffer overflow and deal with it.
6453  * Used by cik_irq_process().
6454  * Returns the value of the wptr.
6455  */
6456 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6457 {
6458         u32 wptr, tmp;
6459
6460         if (rdev->wb.enabled)
6461                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6462         else
6463                 wptr = RREG32(IH_RB_WPTR);
6464
6465         if (wptr & RB_OVERFLOW) {
6466                 /* When a ring buffer overflow happen start parsing interrupt
6467                  * from the last not overwritten vector (wptr + 16). Hopefully
6468                  * this should allow us to catchup.
6469                  */
6470                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6471                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6472                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6473                 tmp = RREG32(IH_RB_CNTL);
6474                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6475                 WREG32(IH_RB_CNTL, tmp);
6476         }
6477         return (wptr & rdev->ih.ptr_mask);
6478 }
6479
6480 /*        CIK IV Ring
6481  * Each IV ring entry is 128 bits:
6482  * [7:0]    - interrupt source id
6483  * [31:8]   - reserved
6484  * [59:32]  - interrupt source data
6485  * [63:60]  - reserved
6486  * [71:64]  - RINGID
6487  *            CP:
6488  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6489  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6490  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6491  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6492  *            PIPE_ID - ME0 0=3D
6493  *                    - ME1&2 compute dispatcher (4 pipes each)
6494  *            SDMA:
6495  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6496  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6497  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6498  * [79:72]  - VMID
6499  * [95:80]  - PASID
6500  * [127:96] - reserved
6501  */
6502 /**
6503  * cik_irq_process - interrupt handler
6504  *
6505  * @rdev: radeon_device pointer
6506  *
6507  * Interrupt hander (CIK).  Walk the IH ring,
6508  * ack interrupts and schedule work to handle
6509  * interrupt events.
6510  * Returns irq process return code.
6511  */
6512 int cik_irq_process(struct radeon_device *rdev)
6513 {
6514         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6515         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6516         u32 wptr;
6517         u32 rptr;
6518         u32 src_id, src_data, ring_id;
6519         u8 me_id, pipe_id, queue_id;
6520         u32 ring_index;
6521         bool queue_hotplug = false;
6522         bool queue_reset = false;
6523         u32 addr, status, mc_client;
6524         bool queue_thermal = false;
6525
6526         if (!rdev->ih.enabled || rdev->shutdown)
6527                 return IRQ_NONE;
6528
6529         wptr = cik_get_ih_wptr(rdev);
6530
6531 restart_ih:
6532         /* is somebody else already processing irqs? */
6533         if (atomic_xchg(&rdev->ih.lock, 1))
6534                 return IRQ_NONE;
6535
6536         rptr = rdev->ih.rptr;
6537         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6538
6539         /* Order reading of wptr vs. reading of IH ring data */
6540         rmb();
6541
6542         /* display interrupts */
6543         cik_irq_ack(rdev);
6544
6545         while (rptr != wptr) {
6546                 /* wptr/rptr are in bytes! */
6547                 ring_index = rptr / 4;
6548                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6549                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6550                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6551
6552                 switch (src_id) {
6553                 case 1: /* D1 vblank/vline */
6554                         switch (src_data) {
6555                         case 0: /* D1 vblank */
6556                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6557                                         if (rdev->irq.crtc_vblank_int[0]) {
6558                                                 drm_handle_vblank(rdev->ddev, 0);
6559                                                 rdev->pm.vblank_sync = true;
6560                                                 wake_up(&rdev->irq.vblank_queue);
6561                                         }
6562                                         if (atomic_read(&rdev->irq.pflip[0]))
6563                                                 radeon_crtc_handle_flip(rdev, 0);
6564                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6565                                         DRM_DEBUG("IH: D1 vblank\n");
6566                                 }
6567                                 break;
6568                         case 1: /* D1 vline */
6569                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6570                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6571                                         DRM_DEBUG("IH: D1 vline\n");
6572                                 }
6573                                 break;
6574                         default:
6575                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6576                                 break;
6577                         }
6578                         break;
6579                 case 2: /* D2 vblank/vline */
6580                         switch (src_data) {
6581                         case 0: /* D2 vblank */
6582                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6583                                         if (rdev->irq.crtc_vblank_int[1]) {
6584                                                 drm_handle_vblank(rdev->ddev, 1);
6585                                                 rdev->pm.vblank_sync = true;
6586                                                 wake_up(&rdev->irq.vblank_queue);
6587                                         }
6588                                         if (atomic_read(&rdev->irq.pflip[1]))
6589                                                 radeon_crtc_handle_flip(rdev, 1);
6590                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6591                                         DRM_DEBUG("IH: D2 vblank\n");
6592                                 }
6593                                 break;
6594                         case 1: /* D2 vline */
6595                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6596                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6597                                         DRM_DEBUG("IH: D2 vline\n");
6598                                 }
6599                                 break;
6600                         default:
6601                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6602                                 break;
6603                         }
6604                         break;
6605                 case 3: /* D3 vblank/vline */
6606                         switch (src_data) {
6607                         case 0: /* D3 vblank */
6608                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6609                                         if (rdev->irq.crtc_vblank_int[2]) {
6610                                                 drm_handle_vblank(rdev->ddev, 2);
6611                                                 rdev->pm.vblank_sync = true;
6612                                                 wake_up(&rdev->irq.vblank_queue);
6613                                         }
6614                                         if (atomic_read(&rdev->irq.pflip[2]))
6615                                                 radeon_crtc_handle_flip(rdev, 2);
6616                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6617                                         DRM_DEBUG("IH: D3 vblank\n");
6618                                 }
6619                                 break;
6620                         case 1: /* D3 vline */
6621                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6622                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6623                                         DRM_DEBUG("IH: D3 vline\n");
6624                                 }
6625                                 break;
6626                         default:
6627                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6628                                 break;
6629                         }
6630                         break;
6631                 case 4: /* D4 vblank/vline */
6632                         switch (src_data) {
6633                         case 0: /* D4 vblank */
6634                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6635                                         if (rdev->irq.crtc_vblank_int[3]) {
6636                                                 drm_handle_vblank(rdev->ddev, 3);
6637                                                 rdev->pm.vblank_sync = true;
6638                                                 wake_up(&rdev->irq.vblank_queue);
6639                                         }
6640                                         if (atomic_read(&rdev->irq.pflip[3]))
6641                                                 radeon_crtc_handle_flip(rdev, 3);
6642                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6643                                         DRM_DEBUG("IH: D4 vblank\n");
6644                                 }
6645                                 break;
6646                         case 1: /* D4 vline */
6647                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6648                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6649                                         DRM_DEBUG("IH: D4 vline\n");
6650                                 }
6651                                 break;
6652                         default:
6653                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6654                                 break;
6655                         }
6656                         break;
6657                 case 5: /* D5 vblank/vline */
6658                         switch (src_data) {
6659                         case 0: /* D5 vblank */
6660                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6661                                         if (rdev->irq.crtc_vblank_int[4]) {
6662                                                 drm_handle_vblank(rdev->ddev, 4);
6663                                                 rdev->pm.vblank_sync = true;
6664                                                 wake_up(&rdev->irq.vblank_queue);
6665                                         }
6666                                         if (atomic_read(&rdev->irq.pflip[4]))
6667                                                 radeon_crtc_handle_flip(rdev, 4);
6668                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6669                                         DRM_DEBUG("IH: D5 vblank\n");
6670                                 }
6671                                 break;
6672                         case 1: /* D5 vline */
6673                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6674                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6675                                         DRM_DEBUG("IH: D5 vline\n");
6676                                 }
6677                                 break;
6678                         default:
6679                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6680                                 break;
6681                         }
6682                         break;
6683                 case 6: /* D6 vblank/vline */
6684                         switch (src_data) {
6685                         case 0: /* D6 vblank */
6686                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6687                                         if (rdev->irq.crtc_vblank_int[5]) {
6688                                                 drm_handle_vblank(rdev->ddev, 5);
6689                                                 rdev->pm.vblank_sync = true;
6690                                                 wake_up(&rdev->irq.vblank_queue);
6691                                         }
6692                                         if (atomic_read(&rdev->irq.pflip[5]))
6693                                                 radeon_crtc_handle_flip(rdev, 5);
6694                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6695                                         DRM_DEBUG("IH: D6 vblank\n");
6696                                 }
6697                                 break;
6698                         case 1: /* D6 vline */
6699                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6700                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6701                                         DRM_DEBUG("IH: D6 vline\n");
6702                                 }
6703                                 break;
6704                         default:
6705                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6706                                 break;
6707                         }
6708                         break;
6709                 case 42: /* HPD hotplug */
6710                         switch (src_data) {
6711                         case 0:
6712                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6713                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6714                                         queue_hotplug = true;
6715                                         DRM_DEBUG("IH: HPD1\n");
6716                                 }
6717                                 break;
6718                         case 1:
6719                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6720                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6721                                         queue_hotplug = true;
6722                                         DRM_DEBUG("IH: HPD2\n");
6723                                 }
6724                                 break;
6725                         case 2:
6726                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6727                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6728                                         queue_hotplug = true;
6729                                         DRM_DEBUG("IH: HPD3\n");
6730                                 }
6731                                 break;
6732                         case 3:
6733                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6734                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6735                                         queue_hotplug = true;
6736                                         DRM_DEBUG("IH: HPD4\n");
6737                                 }
6738                                 break;
6739                         case 4:
6740                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6741                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6742                                         queue_hotplug = true;
6743                                         DRM_DEBUG("IH: HPD5\n");
6744                                 }
6745                                 break;
6746                         case 5:
6747                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6748                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6749                                         queue_hotplug = true;
6750                                         DRM_DEBUG("IH: HPD6\n");
6751                                 }
6752                                 break;
6753                         default:
6754                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6755                                 break;
6756                         }
6757                         break;
6758                 case 124: /* UVD */
6759                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6760                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6761                         break;
6762                 case 146:
6763                 case 147:
6764                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6765                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6766                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6767                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6768                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6769                                 addr);
6770                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6771                                 status);
6772                         cik_vm_decode_fault(rdev, status, addr, mc_client);
6773                         /* reset addr and status */
6774                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6775                         break;
6776                 case 176: /* GFX RB CP_INT */
6777                 case 177: /* GFX IB CP_INT */
6778                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6779                         break;
6780                 case 181: /* CP EOP event */
6781                         DRM_DEBUG("IH: CP EOP\n");
6782                         /* XXX check the bitfield order! */
6783                         me_id = (ring_id & 0x60) >> 5;
6784                         pipe_id = (ring_id & 0x18) >> 3;
6785                         queue_id = (ring_id & 0x7) >> 0;
6786                         switch (me_id) {
6787                         case 0:
6788                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6789                                 break;
6790                         case 1:
6791                         case 2:
6792                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6793                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6794                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6795                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6796                                 break;
6797                         }
6798                         break;
6799                 case 184: /* CP Privileged reg access */
6800                         DRM_ERROR("Illegal register access in command stream\n");
6801                         /* XXX check the bitfield order! */
6802                         me_id = (ring_id & 0x60) >> 5;
6803                         pipe_id = (ring_id & 0x18) >> 3;
6804                         queue_id = (ring_id & 0x7) >> 0;
6805                         switch (me_id) {
6806                         case 0:
6807                                 /* This results in a full GPU reset, but all we need to do is soft
6808                                  * reset the CP for gfx
6809                                  */
6810                                 queue_reset = true;
6811                                 break;
6812                         case 1:
6813                                 /* XXX compute */
6814                                 queue_reset = true;
6815                                 break;
6816                         case 2:
6817                                 /* XXX compute */
6818                                 queue_reset = true;
6819                                 break;
6820                         }
6821                         break;
6822                 case 185: /* CP Privileged inst */
6823                         DRM_ERROR("Illegal instruction in command stream\n");
6824                         /* XXX check the bitfield order! */
6825                         me_id = (ring_id & 0x60) >> 5;
6826                         pipe_id = (ring_id & 0x18) >> 3;
6827                         queue_id = (ring_id & 0x7) >> 0;
6828                         switch (me_id) {
6829                         case 0:
6830                                 /* This results in a full GPU reset, but all we need to do is soft
6831                                  * reset the CP for gfx
6832                                  */
6833                                 queue_reset = true;
6834                                 break;
6835                         case 1:
6836                                 /* XXX compute */
6837                                 queue_reset = true;
6838                                 break;
6839                         case 2:
6840                                 /* XXX compute */
6841                                 queue_reset = true;
6842                                 break;
6843                         }
6844                         break;
6845                 case 224: /* SDMA trap event */
6846                         /* XXX check the bitfield order! */
6847                         me_id = (ring_id & 0x3) >> 0;
6848                         queue_id = (ring_id & 0xc) >> 2;
6849                         DRM_DEBUG("IH: SDMA trap\n");
6850                         switch (me_id) {
6851                         case 0:
6852                                 switch (queue_id) {
6853                                 case 0:
6854                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6855                                         break;
6856                                 case 1:
6857                                         /* XXX compute */
6858                                         break;
6859                                 case 2:
6860                                         /* XXX compute */
6861                                         break;
6862                                 }
6863                                 break;
6864                         case 1:
6865                                 switch (queue_id) {
6866                                 case 0:
6867                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6868                                         break;
6869                                 case 1:
6870                                         /* XXX compute */
6871                                         break;
6872                                 case 2:
6873                                         /* XXX compute */
6874                                         break;
6875                                 }
6876                                 break;
6877                         }
6878                         break;
6879                 case 230: /* thermal low to high */
6880                         DRM_DEBUG("IH: thermal low to high\n");
6881                         rdev->pm.dpm.thermal.high_to_low = false;
6882                         queue_thermal = true;
6883                         break;
6884                 case 231: /* thermal high to low */
6885                         DRM_DEBUG("IH: thermal high to low\n");
6886                         rdev->pm.dpm.thermal.high_to_low = true;
6887                         queue_thermal = true;
6888                         break;
6889                 case 233: /* GUI IDLE */
6890                         DRM_DEBUG("IH: GUI idle\n");
6891                         break;
6892                 case 241: /* SDMA Privileged inst */
6893                 case 247: /* SDMA Privileged inst */
6894                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
6895                         /* XXX check the bitfield order! */
6896                         me_id = (ring_id & 0x3) >> 0;
6897                         queue_id = (ring_id & 0xc) >> 2;
6898                         switch (me_id) {
6899                         case 0:
6900                                 switch (queue_id) {
6901                                 case 0:
6902                                         queue_reset = true;
6903                                         break;
6904                                 case 1:
6905                                         /* XXX compute */
6906                                         queue_reset = true;
6907                                         break;
6908                                 case 2:
6909                                         /* XXX compute */
6910                                         queue_reset = true;
6911                                         break;
6912                                 }
6913                                 break;
6914                         case 1:
6915                                 switch (queue_id) {
6916                                 case 0:
6917                                         queue_reset = true;
6918                                         break;
6919                                 case 1:
6920                                         /* XXX compute */
6921                                         queue_reset = true;
6922                                         break;
6923                                 case 2:
6924                                         /* XXX compute */
6925                                         queue_reset = true;
6926                                         break;
6927                                 }
6928                                 break;
6929                         }
6930                         break;
6931                 default:
6932                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6933                         break;
6934                 }
6935
6936                 /* wptr/rptr are in bytes! */
6937                 rptr += 16;
6938                 rptr &= rdev->ih.ptr_mask;
6939         }
6940         if (queue_hotplug)
6941                 schedule_work(&rdev->hotplug_work);
6942         if (queue_reset)
6943                 schedule_work(&rdev->reset_work);
6944         if (queue_thermal)
6945                 schedule_work(&rdev->pm.dpm.thermal.work);
6946         rdev->ih.rptr = rptr;
6947         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6948         atomic_set(&rdev->ih.lock, 0);
6949
6950         /* make sure wptr hasn't changed while processing */
6951         wptr = cik_get_ih_wptr(rdev);
6952         if (wptr != rptr)
6953                 goto restart_ih;
6954
6955         return IRQ_HANDLED;
6956 }
6957
6958 /*
6959  * startup/shutdown callbacks
6960  */
6961 /**
6962  * cik_startup - program the asic to a functional state
6963  *
6964  * @rdev: radeon_device pointer
6965  *
6966  * Programs the asic to a functional state (CIK).
6967  * Called by cik_init() and cik_resume().
6968  * Returns 0 for success, error for failure.
6969  */
6970 static int cik_startup(struct radeon_device *rdev)
6971 {
6972         struct radeon_ring *ring;
6973         int r;
6974
6975         /* enable pcie gen2/3 link */
6976         cik_pcie_gen3_enable(rdev);
6977         /* enable aspm */
6978         cik_program_aspm(rdev);
6979
6980         /* scratch needs to be initialized before MC */
6981         r = r600_vram_scratch_init(rdev);
6982         if (r)
6983                 return r;
6984
6985         cik_mc_program(rdev);
6986
6987         if (rdev->flags & RADEON_IS_IGP) {
6988                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6989                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6990                         r = cik_init_microcode(rdev);
6991                         if (r) {
6992                                 DRM_ERROR("Failed to load firmware!\n");
6993                                 return r;
6994                         }
6995                 }
6996         } else {
6997                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6998                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6999                     !rdev->mc_fw) {
7000                         r = cik_init_microcode(rdev);
7001                         if (r) {
7002                                 DRM_ERROR("Failed to load firmware!\n");
7003                                 return r;
7004                         }
7005                 }
7006
7007                 r = ci_mc_load_microcode(rdev);
7008                 if (r) {
7009                         DRM_ERROR("Failed to load MC firmware!\n");
7010                         return r;
7011                 }
7012         }
7013
7014         r = cik_pcie_gart_enable(rdev);
7015         if (r)
7016                 return r;
7017         cik_gpu_init(rdev);
7018
7019         /* allocate rlc buffers */
7020         if (rdev->flags & RADEON_IS_IGP) {
7021                 if (rdev->family == CHIP_KAVERI) {
7022                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7023                         rdev->rlc.reg_list_size =
7024                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7025                 } else {
7026                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7027                         rdev->rlc.reg_list_size =
7028                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7029                 }
7030         }
7031         rdev->rlc.cs_data = ci_cs_data;
7032         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7033         r = sumo_rlc_init(rdev);
7034         if (r) {
7035                 DRM_ERROR("Failed to init rlc BOs!\n");
7036                 return r;
7037         }
7038
7039         /* allocate wb buffer */
7040         r = radeon_wb_init(rdev);
7041         if (r)
7042                 return r;
7043
7044         /* allocate mec buffers */
7045         r = cik_mec_init(rdev);
7046         if (r) {
7047                 DRM_ERROR("Failed to init MEC BOs!\n");
7048                 return r;
7049         }
7050
7051         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7052         if (r) {
7053                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7054                 return r;
7055         }
7056
7057         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7058         if (r) {
7059                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7060                 return r;
7061         }
7062
7063         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7064         if (r) {
7065                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7066                 return r;
7067         }
7068
7069         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7070         if (r) {
7071                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7072                 return r;
7073         }
7074
7075         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7076         if (r) {
7077                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7078                 return r;
7079         }
7080
7081         r = radeon_uvd_resume(rdev);
7082         if (!r) {
7083                 r = uvd_v4_2_resume(rdev);
7084                 if (!r) {
7085                         r = radeon_fence_driver_start_ring(rdev,
7086                                                            R600_RING_TYPE_UVD_INDEX);
7087                         if (r)
7088                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7089                 }
7090         }
7091         if (r)
7092                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7093
7094         /* Enable IRQ */
7095         if (!rdev->irq.installed) {
7096                 r = radeon_irq_kms_init(rdev);
7097                 if (r)
7098                         return r;
7099         }
7100
7101         r = cik_irq_init(rdev);
7102         if (r) {
7103                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7104                 radeon_irq_kms_fini(rdev);
7105                 return r;
7106         }
7107         cik_irq_set(rdev);
7108
7109         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7110         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7111                              CP_RB0_RPTR, CP_RB0_WPTR,
7112                              RADEON_CP_PACKET2);
7113         if (r)
7114                 return r;
7115
7116         /* set up the compute queues */
7117         /* type-2 packets are deprecated on MEC, use type-3 instead */
7118         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7119         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7120                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7121                              PACKET3(PACKET3_NOP, 0x3FFF));
7122         if (r)
7123                 return r;
7124         ring->me = 1; /* first MEC */
7125         ring->pipe = 0; /* first pipe */
7126         ring->queue = 0; /* first queue */
7127         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7128
7129         /* type-2 packets are deprecated on MEC, use type-3 instead */
7130         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7131         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7132                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7133                              PACKET3(PACKET3_NOP, 0x3FFF));
7134         if (r)
7135                 return r;
7136         /* dGPU only have 1 MEC */
7137         ring->me = 1; /* first MEC */
7138         ring->pipe = 0; /* first pipe */
7139         ring->queue = 1; /* second queue */
7140         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7141
7142         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7143         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7144                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7145                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7146                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7147         if (r)
7148                 return r;
7149
7150         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7151         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7152                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7153                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7154                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7155         if (r)
7156                 return r;
7157
7158         r = cik_cp_resume(rdev);
7159         if (r)
7160                 return r;
7161
7162         r = cik_sdma_resume(rdev);
7163         if (r)
7164                 return r;
7165
7166         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7167         if (ring->ring_size) {
7168                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7169                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7170                                      RADEON_CP_PACKET2);
7171                 if (!r)
7172                         r = uvd_v1_0_init(rdev);
7173                 if (r)
7174                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7175         }
7176
7177         r = radeon_ib_pool_init(rdev);
7178         if (r) {
7179                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7180                 return r;
7181         }
7182
7183         r = radeon_vm_manager_init(rdev);
7184         if (r) {
7185                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7186                 return r;
7187         }
7188
7189         r = dce6_audio_init(rdev);
7190         if (r)
7191                 return r;
7192
7193         return 0;
7194 }
7195
7196 /**
7197  * cik_resume - resume the asic to a functional state
7198  *
7199  * @rdev: radeon_device pointer
7200  *
7201  * Programs the asic to a functional state (CIK).
7202  * Called at resume.
7203  * Returns 0 for success, error for failure.
7204  */
7205 int cik_resume(struct radeon_device *rdev)
7206 {
7207         int r;
7208
7209         /* post card */
7210         atom_asic_init(rdev->mode_info.atom_context);
7211
7212         /* init golden registers */
7213         cik_init_golden_registers(rdev);
7214
7215         rdev->accel_working = true;
7216         r = cik_startup(rdev);
7217         if (r) {
7218                 DRM_ERROR("cik startup failed on resume\n");
7219                 rdev->accel_working = false;
7220                 return r;
7221         }
7222
7223         return r;
7224
7225 }
7226
7227 /**
7228  * cik_suspend - suspend the asic
7229  *
7230  * @rdev: radeon_device pointer
7231  *
7232  * Bring the chip into a state suitable for suspend (CIK).
7233  * Called at suspend.
7234  * Returns 0 for success.
7235  */
7236 int cik_suspend(struct radeon_device *rdev)
7237 {
7238         dce6_audio_fini(rdev);
7239         radeon_vm_manager_fini(rdev);
7240         cik_cp_enable(rdev, false);
7241         cik_sdma_enable(rdev, false);
7242         uvd_v1_0_fini(rdev);
7243         radeon_uvd_suspend(rdev);
7244         cik_fini_pg(rdev);
7245         cik_fini_cg(rdev);
7246         cik_irq_suspend(rdev);
7247         radeon_wb_disable(rdev);
7248         cik_pcie_gart_disable(rdev);
7249         return 0;
7250 }
7251
7252 /* Plan is to move initialization in that function and use
7253  * helper function so that radeon_device_init pretty much
7254  * do nothing more than calling asic specific function. This
7255  * should also allow to remove a bunch of callback function
7256  * like vram_info.
7257  */
7258 /**
7259  * cik_init - asic specific driver and hw init
7260  *
7261  * @rdev: radeon_device pointer
7262  *
7263  * Setup asic specific driver variables and program the hw
7264  * to a functional state (CIK).
7265  * Called at driver startup.
7266  * Returns 0 for success, errors for failure.
7267  */
7268 int cik_init(struct radeon_device *rdev)
7269 {
7270         struct radeon_ring *ring;
7271         int r;
7272
7273         /* Read BIOS */
7274         if (!radeon_get_bios(rdev)) {
7275                 if (ASIC_IS_AVIVO(rdev))
7276                         return -EINVAL;
7277         }
7278         /* Must be an ATOMBIOS */
7279         if (!rdev->is_atom_bios) {
7280                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7281                 return -EINVAL;
7282         }
7283         r = radeon_atombios_init(rdev);
7284         if (r)
7285                 return r;
7286
7287         /* Post card if necessary */
7288         if (!radeon_card_posted(rdev)) {
7289                 if (!rdev->bios) {
7290                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7291                         return -EINVAL;
7292                 }
7293                 DRM_INFO("GPU not posted. posting now...\n");
7294                 atom_asic_init(rdev->mode_info.atom_context);
7295         }
7296         /* init golden registers */
7297         cik_init_golden_registers(rdev);
7298         /* Initialize scratch registers */
7299         cik_scratch_init(rdev);
7300         /* Initialize surface registers */
7301         radeon_surface_init(rdev);
7302         /* Initialize clocks */
7303         radeon_get_clock_info(rdev->ddev);
7304
7305         /* Fence driver */
7306         r = radeon_fence_driver_init(rdev);
7307         if (r)
7308                 return r;
7309
7310         /* initialize memory controller */
7311         r = cik_mc_init(rdev);
7312         if (r)
7313                 return r;
7314         /* Memory manager */
7315         r = radeon_bo_init(rdev);
7316         if (r)
7317                 return r;
7318
7319         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7320         ring->ring_obj = NULL;
7321         r600_ring_init(rdev, ring, 1024 * 1024);
7322
7323         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7324         ring->ring_obj = NULL;
7325         r600_ring_init(rdev, ring, 1024 * 1024);
7326         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7327         if (r)
7328                 return r;
7329
7330         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7331         ring->ring_obj = NULL;
7332         r600_ring_init(rdev, ring, 1024 * 1024);
7333         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7334         if (r)
7335                 return r;
7336
7337         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7338         ring->ring_obj = NULL;
7339         r600_ring_init(rdev, ring, 256 * 1024);
7340
7341         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7342         ring->ring_obj = NULL;
7343         r600_ring_init(rdev, ring, 256 * 1024);
7344
7345         r = radeon_uvd_init(rdev);
7346         if (!r) {
7347                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7348                 ring->ring_obj = NULL;
7349                 r600_ring_init(rdev, ring, 4096);
7350         }
7351
7352         rdev->ih.ring_obj = NULL;
7353         r600_ih_ring_init(rdev, 64 * 1024);
7354
7355         r = r600_pcie_gart_init(rdev);
7356         if (r)
7357                 return r;
7358
7359         rdev->accel_working = true;
7360         r = cik_startup(rdev);
7361         if (r) {
7362                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7363                 cik_cp_fini(rdev);
7364                 cik_sdma_fini(rdev);
7365                 cik_irq_fini(rdev);
7366                 sumo_rlc_fini(rdev);
7367                 cik_mec_fini(rdev);
7368                 radeon_wb_fini(rdev);
7369                 radeon_ib_pool_fini(rdev);
7370                 radeon_vm_manager_fini(rdev);
7371                 radeon_irq_kms_fini(rdev);
7372                 cik_pcie_gart_fini(rdev);
7373                 rdev->accel_working = false;
7374         }
7375
7376         /* Don't start up if the MC ucode is missing.
7377          * The default clocks and voltages before the MC ucode
7378          * is loaded are not suffient for advanced operations.
7379          */
7380         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7381                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7382                 return -EINVAL;
7383         }
7384
7385         return 0;
7386 }
7387
7388 /**
7389  * cik_fini - asic specific driver and hw fini
7390  *
7391  * @rdev: radeon_device pointer
7392  *
7393  * Tear down the asic specific driver variables and program the hw
7394  * to an idle state (CIK).
7395  * Called at driver unload.
7396  */
7397 void cik_fini(struct radeon_device *rdev)
7398 {
7399         cik_cp_fini(rdev);
7400         cik_sdma_fini(rdev);
7401         cik_fini_pg(rdev);
7402         cik_fini_cg(rdev);
7403         cik_irq_fini(rdev);
7404         sumo_rlc_fini(rdev);
7405         cik_mec_fini(rdev);
7406         radeon_wb_fini(rdev);
7407         radeon_vm_manager_fini(rdev);
7408         radeon_ib_pool_fini(rdev);
7409         radeon_irq_kms_fini(rdev);
7410         uvd_v1_0_fini(rdev);
7411         radeon_uvd_fini(rdev);
7412         cik_pcie_gart_fini(rdev);
7413         r600_vram_scratch_fini(rdev);
7414         radeon_gem_fini(rdev);
7415         radeon_fence_driver_fini(rdev);
7416         radeon_bo_fini(rdev);
7417         radeon_atombios_fini(rdev);
7418         kfree(rdev->bios);
7419         rdev->bios = NULL;
7420 }
7421
7422 /* display watermark setup */
7423 /**
7424  * dce8_line_buffer_adjust - Set up the line buffer
7425  *
7426  * @rdev: radeon_device pointer
7427  * @radeon_crtc: the selected display controller
7428  * @mode: the current display mode on the selected display
7429  * controller
7430  *
7431  * Setup up the line buffer allocation for
7432  * the selected display controller (CIK).
7433  * Returns the line buffer size in pixels.
7434  */
7435 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7436                                    struct radeon_crtc *radeon_crtc,
7437                                    struct drm_display_mode *mode)
7438 {
7439         u32 tmp, buffer_alloc, i;
7440         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7441         /*
7442          * Line Buffer Setup
7443          * There are 6 line buffers, one for each display controllers.
7444          * There are 3 partitions per LB. Select the number of partitions
7445          * to enable based on the display width.  For display widths larger
7446          * than 4096, you need use to use 2 display controllers and combine
7447          * them using the stereo blender.
7448          */
7449         if (radeon_crtc->base.enabled && mode) {
7450                 if (mode->crtc_hdisplay < 1920) {
7451                         tmp = 1;
7452                         buffer_alloc = 2;
7453                 } else if (mode->crtc_hdisplay < 2560) {
7454                         tmp = 2;
7455                         buffer_alloc = 2;
7456                 } else if (mode->crtc_hdisplay < 4096) {
7457                         tmp = 0;
7458                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7459                 } else {
7460                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7461                         tmp = 0;
7462                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7463                 }
7464         } else {
7465                 tmp = 1;
7466                 buffer_alloc = 0;
7467         }
7468
7469         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7470                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7471
7472         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7473                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7474         for (i = 0; i < rdev->usec_timeout; i++) {
7475                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7476                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
7477                         break;
7478                 udelay(1);
7479         }
7480
7481         if (radeon_crtc->base.enabled && mode) {
7482                 switch (tmp) {
7483                 case 0:
7484                 default:
7485                         return 4096 * 2;
7486                 case 1:
7487                         return 1920 * 2;
7488                 case 2:
7489                         return 2560 * 2;
7490                 }
7491         }
7492
7493         /* controller not enabled, so no lb used */
7494         return 0;
7495 }
7496
7497 /**
7498  * cik_get_number_of_dram_channels - get the number of dram channels
7499  *
7500  * @rdev: radeon_device pointer
7501  *
7502  * Look up the number of video ram channels (CIK).
7503  * Used for display watermark bandwidth calculations
7504  * Returns the number of dram channels
7505  */
7506 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7507 {
7508         u32 tmp = RREG32(MC_SHARED_CHMAP);
7509
7510         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7511         case 0:
7512         default:
7513                 return 1;
7514         case 1:
7515                 return 2;
7516         case 2:
7517                 return 4;
7518         case 3:
7519                 return 8;
7520         case 4:
7521                 return 3;
7522         case 5:
7523                 return 6;
7524         case 6:
7525                 return 10;
7526         case 7:
7527                 return 12;
7528         case 8:
7529                 return 16;
7530         }
7531 }
7532
7533 struct dce8_wm_params {
7534         u32 dram_channels; /* number of dram channels */
7535         u32 yclk;          /* bandwidth per dram data pin in kHz */
7536         u32 sclk;          /* engine clock in kHz */
7537         u32 disp_clk;      /* display clock in kHz */
7538         u32 src_width;     /* viewport width */
7539         u32 active_time;   /* active display time in ns */
7540         u32 blank_time;    /* blank time in ns */
7541         bool interlaced;    /* mode is interlaced */
7542         fixed20_12 vsc;    /* vertical scale ratio */
7543         u32 num_heads;     /* number of active crtcs */
7544         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7545         u32 lb_size;       /* line buffer allocated to pipe */
7546         u32 vtaps;         /* vertical scaler taps */
7547 };
7548
7549 /**
7550  * dce8_dram_bandwidth - get the dram bandwidth
7551  *
7552  * @wm: watermark calculation data
7553  *
7554  * Calculate the raw dram bandwidth (CIK).
7555  * Used for display watermark bandwidth calculations
7556  * Returns the dram bandwidth in MBytes/s
7557  */
7558 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7559 {
7560         /* Calculate raw DRAM Bandwidth */
7561         fixed20_12 dram_efficiency; /* 0.7 */
7562         fixed20_12 yclk, dram_channels, bandwidth;
7563         fixed20_12 a;
7564
7565         a.full = dfixed_const(1000);
7566         yclk.full = dfixed_const(wm->yclk);
7567         yclk.full = dfixed_div(yclk, a);
7568         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7569         a.full = dfixed_const(10);
7570         dram_efficiency.full = dfixed_const(7);
7571         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7572         bandwidth.full = dfixed_mul(dram_channels, yclk);
7573         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7574
7575         return dfixed_trunc(bandwidth);
7576 }
7577
7578 /**
7579  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7580  *
7581  * @wm: watermark calculation data
7582  *
7583  * Calculate the dram bandwidth used for display (CIK).
7584  * Used for display watermark bandwidth calculations
7585  * Returns the dram bandwidth for display in MBytes/s
7586  */
7587 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7588 {
7589         /* Calculate DRAM Bandwidth and the part allocated to display. */
7590         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7591         fixed20_12 yclk, dram_channels, bandwidth;
7592         fixed20_12 a;
7593
7594         a.full = dfixed_const(1000);
7595         yclk.full = dfixed_const(wm->yclk);
7596         yclk.full = dfixed_div(yclk, a);
7597         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7598         a.full = dfixed_const(10);
7599         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7600         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7601         bandwidth.full = dfixed_mul(dram_channels, yclk);
7602         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7603
7604         return dfixed_trunc(bandwidth);
7605 }
7606
7607 /**
7608  * dce8_data_return_bandwidth - get the data return bandwidth
7609  *
7610  * @wm: watermark calculation data
7611  *
7612  * Calculate the data return bandwidth used for display (CIK).
7613  * Used for display watermark bandwidth calculations
7614  * Returns the data return bandwidth in MBytes/s
7615  */
7616 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7617 {
7618         /* Calculate the display Data return Bandwidth */
7619         fixed20_12 return_efficiency; /* 0.8 */
7620         fixed20_12 sclk, bandwidth;
7621         fixed20_12 a;
7622
7623         a.full = dfixed_const(1000);
7624         sclk.full = dfixed_const(wm->sclk);
7625         sclk.full = dfixed_div(sclk, a);
7626         a.full = dfixed_const(10);
7627         return_efficiency.full = dfixed_const(8);
7628         return_efficiency.full = dfixed_div(return_efficiency, a);
7629         a.full = dfixed_const(32);
7630         bandwidth.full = dfixed_mul(a, sclk);
7631         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7632
7633         return dfixed_trunc(bandwidth);
7634 }
7635
7636 /**
7637  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7638  *
7639  * @wm: watermark calculation data
7640  *
7641  * Calculate the dmif bandwidth used for display (CIK).
7642  * Used for display watermark bandwidth calculations
7643  * Returns the dmif bandwidth in MBytes/s
7644  */
7645 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7646 {
7647         /* Calculate the DMIF Request Bandwidth */
7648         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7649         fixed20_12 disp_clk, bandwidth;
7650         fixed20_12 a, b;
7651
7652         a.full = dfixed_const(1000);
7653         disp_clk.full = dfixed_const(wm->disp_clk);
7654         disp_clk.full = dfixed_div(disp_clk, a);
7655         a.full = dfixed_const(32);
7656         b.full = dfixed_mul(a, disp_clk);
7657
7658         a.full = dfixed_const(10);
7659         disp_clk_request_efficiency.full = dfixed_const(8);
7660         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7661
7662         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7663
7664         return dfixed_trunc(bandwidth);
7665 }
7666
7667 /**
7668  * dce8_available_bandwidth - get the min available bandwidth
7669  *
7670  * @wm: watermark calculation data
7671  *
7672  * Calculate the min available bandwidth used for display (CIK).
7673  * Used for display watermark bandwidth calculations
7674  * Returns the min available bandwidth in MBytes/s
7675  */
7676 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7677 {
7678         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7679         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7680         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7681         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7682
7683         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7684 }
7685
7686 /**
7687  * dce8_average_bandwidth - get the average available bandwidth
7688  *
7689  * @wm: watermark calculation data
7690  *
7691  * Calculate the average available bandwidth used for display (CIK).
7692  * Used for display watermark bandwidth calculations
7693  * Returns the average available bandwidth in MBytes/s
7694  */
7695 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7696 {
7697         /* Calculate the display mode Average Bandwidth
7698          * DisplayMode should contain the source and destination dimensions,
7699          * timing, etc.
7700          */
7701         fixed20_12 bpp;
7702         fixed20_12 line_time;
7703         fixed20_12 src_width;
7704         fixed20_12 bandwidth;
7705         fixed20_12 a;
7706
7707         a.full = dfixed_const(1000);
7708         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7709         line_time.full = dfixed_div(line_time, a);
7710         bpp.full = dfixed_const(wm->bytes_per_pixel);
7711         src_width.full = dfixed_const(wm->src_width);
7712         bandwidth.full = dfixed_mul(src_width, bpp);
7713         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7714         bandwidth.full = dfixed_div(bandwidth, line_time);
7715
7716         return dfixed_trunc(bandwidth);
7717 }
7718
7719 /**
7720  * dce8_latency_watermark - get the latency watermark
7721  *
7722  * @wm: watermark calculation data
7723  *
7724  * Calculate the latency watermark (CIK).
7725  * Used for display watermark bandwidth calculations
7726  * Returns the latency watermark in ns
7727  */
7728 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7729 {
7730         /* First calculate the latency in ns */
7731         u32 mc_latency = 2000; /* 2000 ns. */
7732         u32 available_bandwidth = dce8_available_bandwidth(wm);
7733         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7734         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7735         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7736         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7737                 (wm->num_heads * cursor_line_pair_return_time);
7738         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7739         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7740         u32 tmp, dmif_size = 12288;
7741         fixed20_12 a, b, c;
7742
7743         if (wm->num_heads == 0)
7744                 return 0;
7745
7746         a.full = dfixed_const(2);
7747         b.full = dfixed_const(1);
7748         if ((wm->vsc.full > a.full) ||
7749             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7750             (wm->vtaps >= 5) ||
7751             ((wm->vsc.full >= a.full) && wm->interlaced))
7752                 max_src_lines_per_dst_line = 4;
7753         else
7754                 max_src_lines_per_dst_line = 2;
7755
7756         a.full = dfixed_const(available_bandwidth);
7757         b.full = dfixed_const(wm->num_heads);
7758         a.full = dfixed_div(a, b);
7759
7760         b.full = dfixed_const(mc_latency + 512);
7761         c.full = dfixed_const(wm->disp_clk);
7762         b.full = dfixed_div(b, c);
7763
7764         c.full = dfixed_const(dmif_size);
7765         b.full = dfixed_div(c, b);
7766
7767         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7768
7769         b.full = dfixed_const(1000);
7770         c.full = dfixed_const(wm->disp_clk);
7771         b.full = dfixed_div(c, b);
7772         c.full = dfixed_const(wm->bytes_per_pixel);
7773         b.full = dfixed_mul(b, c);
7774
7775         lb_fill_bw = min(tmp, dfixed_trunc(b));
7776
7777         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7778         b.full = dfixed_const(1000);
7779         c.full = dfixed_const(lb_fill_bw);
7780         b.full = dfixed_div(c, b);
7781         a.full = dfixed_div(a, b);
7782         line_fill_time = dfixed_trunc(a);
7783
7784         if (line_fill_time < wm->active_time)
7785                 return latency;
7786         else
7787                 return latency + (line_fill_time - wm->active_time);
7788
7789 }
7790
7791 /**
7792  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7793  * average and available dram bandwidth
7794  *
7795  * @wm: watermark calculation data
7796  *
7797  * Check if the display average bandwidth fits in the display
7798  * dram bandwidth (CIK).
7799  * Used for display watermark bandwidth calculations
7800  * Returns true if the display fits, false if not.
7801  */
7802 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7803 {
7804         if (dce8_average_bandwidth(wm) <=
7805             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7806                 return true;
7807         else
7808                 return false;
7809 }
7810
7811 /**
7812  * dce8_average_bandwidth_vs_available_bandwidth - check
7813  * average and available bandwidth
7814  *
7815  * @wm: watermark calculation data
7816  *
7817  * Check if the display average bandwidth fits in the display
7818  * available bandwidth (CIK).
7819  * Used for display watermark bandwidth calculations
7820  * Returns true if the display fits, false if not.
7821  */
7822 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7823 {
7824         if (dce8_average_bandwidth(wm) <=
7825             (dce8_available_bandwidth(wm) / wm->num_heads))
7826                 return true;
7827         else
7828                 return false;
7829 }
7830
7831 /**
7832  * dce8_check_latency_hiding - check latency hiding
7833  *
7834  * @wm: watermark calculation data
7835  *
7836  * Check latency hiding (CIK).
7837  * Used for display watermark bandwidth calculations
7838  * Returns true if the display fits, false if not.
7839  */
7840 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7841 {
7842         u32 lb_partitions = wm->lb_size / wm->src_width;
7843         u32 line_time = wm->active_time + wm->blank_time;
7844         u32 latency_tolerant_lines;
7845         u32 latency_hiding;
7846         fixed20_12 a;
7847
7848         a.full = dfixed_const(1);
7849         if (wm->vsc.full > a.full)
7850                 latency_tolerant_lines = 1;
7851         else {
7852                 if (lb_partitions <= (wm->vtaps + 1))
7853                         latency_tolerant_lines = 1;
7854                 else
7855                         latency_tolerant_lines = 2;
7856         }
7857
7858         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7859
7860         if (dce8_latency_watermark(wm) <= latency_hiding)
7861                 return true;
7862         else
7863                 return false;
7864 }
7865
7866 /**
7867  * dce8_program_watermarks - program display watermarks
7868  *
7869  * @rdev: radeon_device pointer
7870  * @radeon_crtc: the selected display controller
7871  * @lb_size: line buffer size
7872  * @num_heads: number of display controllers in use
7873  *
7874  * Calculate and program the display watermarks for the
7875  * selected display controller (CIK).
7876  */
7877 static void dce8_program_watermarks(struct radeon_device *rdev,
7878                                     struct radeon_crtc *radeon_crtc,
7879                                     u32 lb_size, u32 num_heads)
7880 {
7881         struct drm_display_mode *mode = &radeon_crtc->base.mode;
7882         struct dce8_wm_params wm_low, wm_high;
7883         u32 pixel_period;
7884         u32 line_time = 0;
7885         u32 latency_watermark_a = 0, latency_watermark_b = 0;
7886         u32 tmp, wm_mask;
7887
7888         if (radeon_crtc->base.enabled && num_heads && mode) {
7889                 pixel_period = 1000000 / (u32)mode->clock;
7890                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7891
7892                 /* watermark for high clocks */
7893                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7894                     rdev->pm.dpm_enabled) {
7895                         wm_high.yclk =
7896                                 radeon_dpm_get_mclk(rdev, false) * 10;
7897                         wm_high.sclk =
7898                                 radeon_dpm_get_sclk(rdev, false) * 10;
7899                 } else {
7900                         wm_high.yclk = rdev->pm.current_mclk * 10;
7901                         wm_high.sclk = rdev->pm.current_sclk * 10;
7902                 }
7903
7904                 wm_high.disp_clk = mode->clock;
7905                 wm_high.src_width = mode->crtc_hdisplay;
7906                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7907                 wm_high.blank_time = line_time - wm_high.active_time;
7908                 wm_high.interlaced = false;
7909                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7910                         wm_high.interlaced = true;
7911                 wm_high.vsc = radeon_crtc->vsc;
7912                 wm_high.vtaps = 1;
7913                 if (radeon_crtc->rmx_type != RMX_OFF)
7914                         wm_high.vtaps = 2;
7915                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7916                 wm_high.lb_size = lb_size;
7917                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7918                 wm_high.num_heads = num_heads;
7919
7920                 /* set for high clocks */
7921                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7922
7923                 /* possibly force display priority to high */
7924                 /* should really do this at mode validation time... */
7925                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7926                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7927                     !dce8_check_latency_hiding(&wm_high) ||
7928                     (rdev->disp_priority == 2)) {
7929                         DRM_DEBUG_KMS("force priority to high\n");
7930                 }
7931
7932                 /* watermark for low clocks */
7933                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7934                     rdev->pm.dpm_enabled) {
7935                         wm_low.yclk =
7936                                 radeon_dpm_get_mclk(rdev, true) * 10;
7937                         wm_low.sclk =
7938                                 radeon_dpm_get_sclk(rdev, true) * 10;
7939                 } else {
7940                         wm_low.yclk = rdev->pm.current_mclk * 10;
7941                         wm_low.sclk = rdev->pm.current_sclk * 10;
7942                 }
7943
7944                 wm_low.disp_clk = mode->clock;
7945                 wm_low.src_width = mode->crtc_hdisplay;
7946                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7947                 wm_low.blank_time = line_time - wm_low.active_time;
7948                 wm_low.interlaced = false;
7949                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7950                         wm_low.interlaced = true;
7951                 wm_low.vsc = radeon_crtc->vsc;
7952                 wm_low.vtaps = 1;
7953                 if (radeon_crtc->rmx_type != RMX_OFF)
7954                         wm_low.vtaps = 2;
7955                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7956                 wm_low.lb_size = lb_size;
7957                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7958                 wm_low.num_heads = num_heads;
7959
7960                 /* set for low clocks */
7961                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7962
7963                 /* possibly force display priority to high */
7964                 /* should really do this at mode validation time... */
7965                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7966                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7967                     !dce8_check_latency_hiding(&wm_low) ||
7968                     (rdev->disp_priority == 2)) {
7969                         DRM_DEBUG_KMS("force priority to high\n");
7970                 }
7971         }
7972
7973         /* select wm A */
7974         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7975         tmp = wm_mask;
7976         tmp &= ~LATENCY_WATERMARK_MASK(3);
7977         tmp |= LATENCY_WATERMARK_MASK(1);
7978         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7979         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7980                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7981                 LATENCY_HIGH_WATERMARK(line_time)));
7982         /* select wm B */
7983         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7984         tmp &= ~LATENCY_WATERMARK_MASK(3);
7985         tmp |= LATENCY_WATERMARK_MASK(2);
7986         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7987         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7988                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7989                 LATENCY_HIGH_WATERMARK(line_time)));
7990         /* restore original selection */
7991         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7992
7993         /* save values for DPM */
7994         radeon_crtc->line_time = line_time;
7995         radeon_crtc->wm_high = latency_watermark_a;
7996         radeon_crtc->wm_low = latency_watermark_b;
7997 }
7998
7999 /**
8000  * dce8_bandwidth_update - program display watermarks
8001  *
8002  * @rdev: radeon_device pointer
8003  *
8004  * Calculate and program the display watermarks and line
8005  * buffer allocation (CIK).
8006  */
8007 void dce8_bandwidth_update(struct radeon_device *rdev)
8008 {
8009         struct drm_display_mode *mode = NULL;
8010         u32 num_heads = 0, lb_size;
8011         int i;
8012
8013         radeon_update_display_priority(rdev);
8014
8015         for (i = 0; i < rdev->num_crtc; i++) {
8016                 if (rdev->mode_info.crtcs[i]->base.enabled)
8017                         num_heads++;
8018         }
8019         for (i = 0; i < rdev->num_crtc; i++) {
8020                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8021                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8022                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8023         }
8024 }
8025
8026 /**
8027  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8028  *
8029  * @rdev: radeon_device pointer
8030  *
8031  * Fetches a GPU clock counter snapshot (SI).
8032  * Returns the 64 bit clock counter snapshot.
8033  */
8034 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8035 {
8036         uint64_t clock;
8037
8038         mutex_lock(&rdev->gpu_clock_mutex);
8039         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8040         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8041                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8042         mutex_unlock(&rdev->gpu_clock_mutex);
8043         return clock;
8044 }
8045
8046 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8047                               u32 cntl_reg, u32 status_reg)
8048 {
8049         int r, i;
8050         struct atom_clock_dividers dividers;
8051         uint32_t tmp;
8052
8053         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8054                                            clock, false, &dividers);
8055         if (r)
8056                 return r;
8057
8058         tmp = RREG32_SMC(cntl_reg);
8059         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8060         tmp |= dividers.post_divider;
8061         WREG32_SMC(cntl_reg, tmp);
8062
8063         for (i = 0; i < 100; i++) {
8064                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8065                         break;
8066                 mdelay(10);
8067         }
8068         if (i == 100)
8069                 return -ETIMEDOUT;
8070
8071         return 0;
8072 }
8073
8074 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8075 {
8076         int r = 0;
8077
8078         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8079         if (r)
8080                 return r;
8081
8082         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8083         return r;
8084 }
8085
8086 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8087 {
8088         struct pci_dev *root = rdev->pdev->bus->self;
8089         int bridge_pos, gpu_pos;
8090         u32 speed_cntl, mask, current_data_rate;
8091         int ret, i;
8092         u16 tmp16;
8093
8094         if (radeon_pcie_gen2 == 0)
8095                 return;
8096
8097         if (rdev->flags & RADEON_IS_IGP)
8098                 return;
8099
8100         if (!(rdev->flags & RADEON_IS_PCIE))
8101                 return;
8102
8103         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8104         if (ret != 0)
8105                 return;
8106
8107         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8108                 return;
8109
8110         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8111         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8112                 LC_CURRENT_DATA_RATE_SHIFT;
8113         if (mask & DRM_PCIE_SPEED_80) {
8114                 if (current_data_rate == 2) {
8115                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8116                         return;
8117                 }
8118                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8119         } else if (mask & DRM_PCIE_SPEED_50) {
8120                 if (current_data_rate == 1) {
8121                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8122                         return;
8123                 }
8124                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8125         }
8126
8127         bridge_pos = pci_pcie_cap(root);
8128         if (!bridge_pos)
8129                 return;
8130
8131         gpu_pos = pci_pcie_cap(rdev->pdev);
8132         if (!gpu_pos)
8133                 return;
8134
8135         if (mask & DRM_PCIE_SPEED_80) {
8136                 /* re-try equalization if gen3 is not already enabled */
8137                 if (current_data_rate != 2) {
8138                         u16 bridge_cfg, gpu_cfg;
8139                         u16 bridge_cfg2, gpu_cfg2;
8140                         u32 max_lw, current_lw, tmp;
8141
8142                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8143                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8144
8145                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8146                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8147
8148                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8149                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8150
8151                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8152                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8153                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8154
8155                         if (current_lw < max_lw) {
8156                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8157                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8158                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8159                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8160                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8161                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8162                                 }
8163                         }
8164
8165                         for (i = 0; i < 10; i++) {
8166                                 /* check status */
8167                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8168                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8169                                         break;
8170
8171                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8172                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8173
8174                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8175                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8176
8177                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8178                                 tmp |= LC_SET_QUIESCE;
8179                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8180
8181                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8182                                 tmp |= LC_REDO_EQ;
8183                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8184
8185                                 mdelay(100);
8186
8187                                 /* linkctl */
8188                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8189                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8190                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8191                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8192
8193                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8194                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8195                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8196                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8197
8198                                 /* linkctl2 */
8199                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8200                                 tmp16 &= ~((1 << 4) | (7 << 9));
8201                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8202                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8203
8204                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8205                                 tmp16 &= ~((1 << 4) | (7 << 9));
8206                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8207                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8208
8209                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8210                                 tmp &= ~LC_SET_QUIESCE;
8211                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8212                         }
8213                 }
8214         }
8215
8216         /* set the link speed */
8217         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8218         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8219         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8220
8221         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8222         tmp16 &= ~0xf;
8223         if (mask & DRM_PCIE_SPEED_80)
8224                 tmp16 |= 3; /* gen3 */
8225         else if (mask & DRM_PCIE_SPEED_50)
8226                 tmp16 |= 2; /* gen2 */
8227         else
8228                 tmp16 |= 1; /* gen1 */
8229         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8230
8231         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8232         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8233         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8234
8235         for (i = 0; i < rdev->usec_timeout; i++) {
8236                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8237                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8238                         break;
8239                 udelay(1);
8240         }
8241 }
8242
8243 static void cik_program_aspm(struct radeon_device *rdev)
8244 {
8245         u32 data, orig;
8246         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8247         bool disable_clkreq = false;
8248
8249         if (radeon_aspm == 0)
8250                 return;
8251
8252         /* XXX double check IGPs */
8253         if (rdev->flags & RADEON_IS_IGP)
8254                 return;
8255
8256         if (!(rdev->flags & RADEON_IS_PCIE))
8257                 return;
8258
8259         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8260         data &= ~LC_XMIT_N_FTS_MASK;
8261         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8262         if (orig != data)
8263                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8264
8265         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8266         data |= LC_GO_TO_RECOVERY;
8267         if (orig != data)
8268                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8269
8270         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8271         data |= P_IGNORE_EDB_ERR;
8272         if (orig != data)
8273                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8274
8275         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8276         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8277         data |= LC_PMI_TO_L1_DIS;
8278         if (!disable_l0s)
8279                 data |= LC_L0S_INACTIVITY(7);
8280
8281         if (!disable_l1) {
8282                 data |= LC_L1_INACTIVITY(7);
8283                 data &= ~LC_PMI_TO_L1_DIS;
8284                 if (orig != data)
8285                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8286
8287                 if (!disable_plloff_in_l1) {
8288                         bool clk_req_support;
8289
8290                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8291                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8292                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8293                         if (orig != data)
8294                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8295
8296                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8297                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8298                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8299                         if (orig != data)
8300                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8301
8302                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8303                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8304                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8305                         if (orig != data)
8306                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8307
8308                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8309                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8310                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8311                         if (orig != data)
8312                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8313
8314                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8315                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8316                         data |= LC_DYN_LANES_PWR_STATE(3);
8317                         if (orig != data)
8318                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8319
8320                         if (!disable_clkreq) {
8321                                 struct pci_dev *root = rdev->pdev->bus->self;
8322                                 u32 lnkcap;
8323
8324                                 clk_req_support = false;
8325                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8326                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8327                                         clk_req_support = true;
8328                         } else {
8329                                 clk_req_support = false;
8330                         }
8331
8332                         if (clk_req_support) {
8333                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8334                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8335                                 if (orig != data)
8336                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8337
8338                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8339                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8340                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8341                                 if (orig != data)
8342                                         WREG32_SMC(THM_CLK_CNTL, data);
8343
8344                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8345                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8346                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8347                                 if (orig != data)
8348                                         WREG32_SMC(MISC_CLK_CTRL, data);
8349
8350                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8351                                 data &= ~BCLK_AS_XCLK;
8352                                 if (orig != data)
8353                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8354
8355                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8356                                 data &= ~FORCE_BIF_REFCLK_EN;
8357                                 if (orig != data)
8358                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8359
8360                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8361                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8362                                 data |= MPLL_CLKOUT_SEL(4);
8363                                 if (orig != data)
8364                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8365                         }
8366                 }
8367         } else {
8368                 if (orig != data)
8369                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8370         }
8371
8372         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8373         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8374         if (orig != data)
8375                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8376
8377         if (!disable_l0s) {
8378                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8379                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8380                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8381                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8382                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8383                                 data &= ~LC_L0S_INACTIVITY_MASK;
8384                                 if (orig != data)
8385                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8386                         }
8387                 }
8388         }
8389 }