]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/radeon/si.c
Merge remote-tracking branch 'hid/for-next'
[karo-tx-linux.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 extern void si_dma_vm_set_page(struct radeon_device *rdev,
82                                struct radeon_ib *ib,
83                                uint64_t pe,
84                                uint64_t addr, unsigned count,
85                                uint32_t incr, uint32_t flags);
86 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
87                                          bool enable);
88 static void si_fini_pg(struct radeon_device *rdev);
89 static void si_fini_cg(struct radeon_device *rdev);
90 static void si_rlc_stop(struct radeon_device *rdev);
91
92 static const u32 verde_rlc_save_restore_register_list[] =
93 {
94         (0x8000 << 16) | (0x98f4 >> 2),
95         0x00000000,
96         (0x8040 << 16) | (0x98f4 >> 2),
97         0x00000000,
98         (0x8000 << 16) | (0xe80 >> 2),
99         0x00000000,
100         (0x8040 << 16) | (0xe80 >> 2),
101         0x00000000,
102         (0x8000 << 16) | (0x89bc >> 2),
103         0x00000000,
104         (0x8040 << 16) | (0x89bc >> 2),
105         0x00000000,
106         (0x8000 << 16) | (0x8c1c >> 2),
107         0x00000000,
108         (0x8040 << 16) | (0x8c1c >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x98f0 >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0xe7c >> 2),
113         0x00000000,
114         (0x8000 << 16) | (0x9148 >> 2),
115         0x00000000,
116         (0x8040 << 16) | (0x9148 >> 2),
117         0x00000000,
118         (0x9c00 << 16) | (0x9150 >> 2),
119         0x00000000,
120         (0x9c00 << 16) | (0x897c >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0x8d8c >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0xac54 >> 2),
125         0X00000000,
126         0x3,
127         (0x9c00 << 16) | (0x98f8 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9910 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x9914 >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x9918 >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x991c >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9920 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x9924 >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x9928 >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x992c >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9930 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x9934 >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x9938 >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x993c >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9940 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x9944 >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x9948 >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x994c >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9950 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x9954 >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x9958 >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x995c >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9960 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x9964 >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9968 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x996c >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9970 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9974 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x9978 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x997c >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x9980 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x9984 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x9988 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x998c >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x8c00 >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x8c14 >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0x8c04 >> 2),
198         0x00000000,
199         (0x9c00 << 16) | (0x8c08 >> 2),
200         0x00000000,
201         (0x8000 << 16) | (0x9b7c >> 2),
202         0x00000000,
203         (0x8040 << 16) | (0x9b7c >> 2),
204         0x00000000,
205         (0x8000 << 16) | (0xe84 >> 2),
206         0x00000000,
207         (0x8040 << 16) | (0xe84 >> 2),
208         0x00000000,
209         (0x8000 << 16) | (0x89c0 >> 2),
210         0x00000000,
211         (0x8040 << 16) | (0x89c0 >> 2),
212         0x00000000,
213         (0x8000 << 16) | (0x914c >> 2),
214         0x00000000,
215         (0x8040 << 16) | (0x914c >> 2),
216         0x00000000,
217         (0x8000 << 16) | (0x8c20 >> 2),
218         0x00000000,
219         (0x8040 << 16) | (0x8c20 >> 2),
220         0x00000000,
221         (0x8000 << 16) | (0x9354 >> 2),
222         0x00000000,
223         (0x8040 << 16) | (0x9354 >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x9060 >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9364 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x9100 >> 2),
230         0x00000000,
231         (0x9c00 << 16) | (0x913c >> 2),
232         0x00000000,
233         (0x8000 << 16) | (0x90e0 >> 2),
234         0x00000000,
235         (0x8000 << 16) | (0x90e4 >> 2),
236         0x00000000,
237         (0x8000 << 16) | (0x90e8 >> 2),
238         0x00000000,
239         (0x8040 << 16) | (0x90e0 >> 2),
240         0x00000000,
241         (0x8040 << 16) | (0x90e4 >> 2),
242         0x00000000,
243         (0x8040 << 16) | (0x90e8 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8bcc >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x8b24 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x88c4 >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x8e50 >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0x8c0c >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0x8e58 >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0x8e5c >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0x9508 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0x950c >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0x9494 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0xac0c >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0xac10 >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0xac14 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0xae00 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0xac08 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x88d4 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x88c8 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x88cc >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x89b0 >> 2),
282         0x00000000,
283         (0x9c00 << 16) | (0x8b10 >> 2),
284         0x00000000,
285         (0x9c00 << 16) | (0x8a14 >> 2),
286         0x00000000,
287         (0x9c00 << 16) | (0x9830 >> 2),
288         0x00000000,
289         (0x9c00 << 16) | (0x9834 >> 2),
290         0x00000000,
291         (0x9c00 << 16) | (0x9838 >> 2),
292         0x00000000,
293         (0x9c00 << 16) | (0x9a10 >> 2),
294         0x00000000,
295         (0x8000 << 16) | (0x9870 >> 2),
296         0x00000000,
297         (0x8000 << 16) | (0x9874 >> 2),
298         0x00000000,
299         (0x8001 << 16) | (0x9870 >> 2),
300         0x00000000,
301         (0x8001 << 16) | (0x9874 >> 2),
302         0x00000000,
303         (0x8040 << 16) | (0x9870 >> 2),
304         0x00000000,
305         (0x8040 << 16) | (0x9874 >> 2),
306         0x00000000,
307         (0x8041 << 16) | (0x9870 >> 2),
308         0x00000000,
309         (0x8041 << 16) | (0x9874 >> 2),
310         0x00000000,
311         0x00000000
312 };
313
314 static const u32 tahiti_golden_rlc_registers[] =
315 {
316         0xc424, 0xffffffff, 0x00601005,
317         0xc47c, 0xffffffff, 0x10104040,
318         0xc488, 0xffffffff, 0x0100000a,
319         0xc314, 0xffffffff, 0x00000800,
320         0xc30c, 0xffffffff, 0x800000f4,
321         0xf4a8, 0xffffffff, 0x00000000
322 };
323
324 static const u32 tahiti_golden_registers[] =
325 {
326         0x9a10, 0x00010000, 0x00018208,
327         0x9830, 0xffffffff, 0x00000000,
328         0x9834, 0xf00fffff, 0x00000400,
329         0x9838, 0x0002021c, 0x00020200,
330         0xc78, 0x00000080, 0x00000000,
331         0xd030, 0x000300c0, 0x00800040,
332         0xd830, 0x000300c0, 0x00800040,
333         0x5bb0, 0x000000f0, 0x00000070,
334         0x5bc0, 0x00200000, 0x50100000,
335         0x7030, 0x31000311, 0x00000011,
336         0x277c, 0x00000003, 0x000007ff,
337         0x240c, 0x000007ff, 0x00000000,
338         0x8a14, 0xf000001f, 0x00000007,
339         0x8b24, 0xffffffff, 0x00ffffff,
340         0x8b10, 0x0000ff0f, 0x00000000,
341         0x28a4c, 0x07ffffff, 0x4e000000,
342         0x28350, 0x3f3f3fff, 0x2a00126a,
343         0x30, 0x000000ff, 0x0040,
344         0x34, 0x00000040, 0x00004040,
345         0x9100, 0x07ffffff, 0x03000000,
346         0x8e88, 0x01ff1f3f, 0x00000000,
347         0x8e84, 0x01ff1f3f, 0x00000000,
348         0x9060, 0x0000007f, 0x00000020,
349         0x9508, 0x00010000, 0x00010000,
350         0xac14, 0x00000200, 0x000002fb,
351         0xac10, 0xffffffff, 0x0000543b,
352         0xac0c, 0xffffffff, 0xa9210876,
353         0x88d0, 0xffffffff, 0x000fff40,
354         0x88d4, 0x0000001f, 0x00000010,
355         0x1410, 0x20000000, 0x20fffed8,
356         0x15c0, 0x000c0fc0, 0x000c0400
357 };
358
359 static const u32 tahiti_golden_registers2[] =
360 {
361         0xc64, 0x00000001, 0x00000001
362 };
363
364 static const u32 pitcairn_golden_rlc_registers[] =
365 {
366         0xc424, 0xffffffff, 0x00601004,
367         0xc47c, 0xffffffff, 0x10102020,
368         0xc488, 0xffffffff, 0x01000020,
369         0xc314, 0xffffffff, 0x00000800,
370         0xc30c, 0xffffffff, 0x800000a4
371 };
372
373 static const u32 pitcairn_golden_registers[] =
374 {
375         0x9a10, 0x00010000, 0x00018208,
376         0x9830, 0xffffffff, 0x00000000,
377         0x9834, 0xf00fffff, 0x00000400,
378         0x9838, 0x0002021c, 0x00020200,
379         0xc78, 0x00000080, 0x00000000,
380         0xd030, 0x000300c0, 0x00800040,
381         0xd830, 0x000300c0, 0x00800040,
382         0x5bb0, 0x000000f0, 0x00000070,
383         0x5bc0, 0x00200000, 0x50100000,
384         0x7030, 0x31000311, 0x00000011,
385         0x2ae4, 0x00073ffe, 0x000022a2,
386         0x240c, 0x000007ff, 0x00000000,
387         0x8a14, 0xf000001f, 0x00000007,
388         0x8b24, 0xffffffff, 0x00ffffff,
389         0x8b10, 0x0000ff0f, 0x00000000,
390         0x28a4c, 0x07ffffff, 0x4e000000,
391         0x28350, 0x3f3f3fff, 0x2a00126a,
392         0x30, 0x000000ff, 0x0040,
393         0x34, 0x00000040, 0x00004040,
394         0x9100, 0x07ffffff, 0x03000000,
395         0x9060, 0x0000007f, 0x00000020,
396         0x9508, 0x00010000, 0x00010000,
397         0xac14, 0x000003ff, 0x000000f7,
398         0xac10, 0xffffffff, 0x00000000,
399         0xac0c, 0xffffffff, 0x32761054,
400         0x88d4, 0x0000001f, 0x00000010,
401         0x15c0, 0x000c0fc0, 0x000c0400
402 };
403
404 static const u32 verde_golden_rlc_registers[] =
405 {
406         0xc424, 0xffffffff, 0x033f1005,
407         0xc47c, 0xffffffff, 0x10808020,
408         0xc488, 0xffffffff, 0x00800008,
409         0xc314, 0xffffffff, 0x00001000,
410         0xc30c, 0xffffffff, 0x80010014
411 };
412
413 static const u32 verde_golden_registers[] =
414 {
415         0x9a10, 0x00010000, 0x00018208,
416         0x9830, 0xffffffff, 0x00000000,
417         0x9834, 0xf00fffff, 0x00000400,
418         0x9838, 0x0002021c, 0x00020200,
419         0xc78, 0x00000080, 0x00000000,
420         0xd030, 0x000300c0, 0x00800040,
421         0xd030, 0x000300c0, 0x00800040,
422         0xd830, 0x000300c0, 0x00800040,
423         0xd830, 0x000300c0, 0x00800040,
424         0x5bb0, 0x000000f0, 0x00000070,
425         0x5bc0, 0x00200000, 0x50100000,
426         0x7030, 0x31000311, 0x00000011,
427         0x2ae4, 0x00073ffe, 0x000022a2,
428         0x2ae4, 0x00073ffe, 0x000022a2,
429         0x2ae4, 0x00073ffe, 0x000022a2,
430         0x240c, 0x000007ff, 0x00000000,
431         0x240c, 0x000007ff, 0x00000000,
432         0x240c, 0x000007ff, 0x00000000,
433         0x8a14, 0xf000001f, 0x00000007,
434         0x8a14, 0xf000001f, 0x00000007,
435         0x8a14, 0xf000001f, 0x00000007,
436         0x8b24, 0xffffffff, 0x00ffffff,
437         0x8b10, 0x0000ff0f, 0x00000000,
438         0x28a4c, 0x07ffffff, 0x4e000000,
439         0x28350, 0x3f3f3fff, 0x0000124a,
440         0x28350, 0x3f3f3fff, 0x0000124a,
441         0x28350, 0x3f3f3fff, 0x0000124a,
442         0x30, 0x000000ff, 0x0040,
443         0x34, 0x00000040, 0x00004040,
444         0x9100, 0x07ffffff, 0x03000000,
445         0x9100, 0x07ffffff, 0x03000000,
446         0x8e88, 0x01ff1f3f, 0x00000000,
447         0x8e88, 0x01ff1f3f, 0x00000000,
448         0x8e88, 0x01ff1f3f, 0x00000000,
449         0x8e84, 0x01ff1f3f, 0x00000000,
450         0x8e84, 0x01ff1f3f, 0x00000000,
451         0x8e84, 0x01ff1f3f, 0x00000000,
452         0x9060, 0x0000007f, 0x00000020,
453         0x9508, 0x00010000, 0x00010000,
454         0xac14, 0x000003ff, 0x00000003,
455         0xac14, 0x000003ff, 0x00000003,
456         0xac14, 0x000003ff, 0x00000003,
457         0xac10, 0xffffffff, 0x00000000,
458         0xac10, 0xffffffff, 0x00000000,
459         0xac10, 0xffffffff, 0x00000000,
460         0xac0c, 0xffffffff, 0x00001032,
461         0xac0c, 0xffffffff, 0x00001032,
462         0xac0c, 0xffffffff, 0x00001032,
463         0x88d4, 0x0000001f, 0x00000010,
464         0x88d4, 0x0000001f, 0x00000010,
465         0x88d4, 0x0000001f, 0x00000010,
466         0x15c0, 0x000c0fc0, 0x000c0400
467 };
468
469 static const u32 oland_golden_rlc_registers[] =
470 {
471         0xc424, 0xffffffff, 0x00601005,
472         0xc47c, 0xffffffff, 0x10104040,
473         0xc488, 0xffffffff, 0x0100000a,
474         0xc314, 0xffffffff, 0x00000800,
475         0xc30c, 0xffffffff, 0x800000f4
476 };
477
478 static const u32 oland_golden_registers[] =
479 {
480         0x9a10, 0x00010000, 0x00018208,
481         0x9830, 0xffffffff, 0x00000000,
482         0x9834, 0xf00fffff, 0x00000400,
483         0x9838, 0x0002021c, 0x00020200,
484         0xc78, 0x00000080, 0x00000000,
485         0xd030, 0x000300c0, 0x00800040,
486         0xd830, 0x000300c0, 0x00800040,
487         0x5bb0, 0x000000f0, 0x00000070,
488         0x5bc0, 0x00200000, 0x50100000,
489         0x7030, 0x31000311, 0x00000011,
490         0x2ae4, 0x00073ffe, 0x000022a2,
491         0x240c, 0x000007ff, 0x00000000,
492         0x8a14, 0xf000001f, 0x00000007,
493         0x8b24, 0xffffffff, 0x00ffffff,
494         0x8b10, 0x0000ff0f, 0x00000000,
495         0x28a4c, 0x07ffffff, 0x4e000000,
496         0x28350, 0x3f3f3fff, 0x00000082,
497         0x30, 0x000000ff, 0x0040,
498         0x34, 0x00000040, 0x00004040,
499         0x9100, 0x07ffffff, 0x03000000,
500         0x9060, 0x0000007f, 0x00000020,
501         0x9508, 0x00010000, 0x00010000,
502         0xac14, 0x000003ff, 0x000000f3,
503         0xac10, 0xffffffff, 0x00000000,
504         0xac0c, 0xffffffff, 0x00003210,
505         0x88d4, 0x0000001f, 0x00000010,
506         0x15c0, 0x000c0fc0, 0x000c0400
507 };
508
509 static const u32 hainan_golden_registers[] =
510 {
511         0x9a10, 0x00010000, 0x00018208,
512         0x9830, 0xffffffff, 0x00000000,
513         0x9834, 0xf00fffff, 0x00000400,
514         0x9838, 0x0002021c, 0x00020200,
515         0xd0c0, 0xff000fff, 0x00000100,
516         0xd030, 0x000300c0, 0x00800040,
517         0xd8c0, 0xff000fff, 0x00000100,
518         0xd830, 0x000300c0, 0x00800040,
519         0x2ae4, 0x00073ffe, 0x000022a2,
520         0x240c, 0x000007ff, 0x00000000,
521         0x8a14, 0xf000001f, 0x00000007,
522         0x8b24, 0xffffffff, 0x00ffffff,
523         0x8b10, 0x0000ff0f, 0x00000000,
524         0x28a4c, 0x07ffffff, 0x4e000000,
525         0x28350, 0x3f3f3fff, 0x00000000,
526         0x30, 0x000000ff, 0x0040,
527         0x34, 0x00000040, 0x00004040,
528         0x9100, 0x03e00000, 0x03600000,
529         0x9060, 0x0000007f, 0x00000020,
530         0x9508, 0x00010000, 0x00010000,
531         0xac14, 0x000003ff, 0x000000f1,
532         0xac10, 0xffffffff, 0x00000000,
533         0xac0c, 0xffffffff, 0x00003210,
534         0x88d4, 0x0000001f, 0x00000010,
535         0x15c0, 0x000c0fc0, 0x000c0400
536 };
537
538 static const u32 hainan_golden_registers2[] =
539 {
540         0x98f8, 0xffffffff, 0x02010001
541 };
542
543 static const u32 tahiti_mgcg_cgcg_init[] =
544 {
545         0xc400, 0xffffffff, 0xfffffffc,
546         0x802c, 0xffffffff, 0xe0000000,
547         0x9a60, 0xffffffff, 0x00000100,
548         0x92a4, 0xffffffff, 0x00000100,
549         0xc164, 0xffffffff, 0x00000100,
550         0x9774, 0xffffffff, 0x00000100,
551         0x8984, 0xffffffff, 0x06000100,
552         0x8a18, 0xffffffff, 0x00000100,
553         0x92a0, 0xffffffff, 0x00000100,
554         0xc380, 0xffffffff, 0x00000100,
555         0x8b28, 0xffffffff, 0x00000100,
556         0x9144, 0xffffffff, 0x00000100,
557         0x8d88, 0xffffffff, 0x00000100,
558         0x8d8c, 0xffffffff, 0x00000100,
559         0x9030, 0xffffffff, 0x00000100,
560         0x9034, 0xffffffff, 0x00000100,
561         0x9038, 0xffffffff, 0x00000100,
562         0x903c, 0xffffffff, 0x00000100,
563         0xad80, 0xffffffff, 0x00000100,
564         0xac54, 0xffffffff, 0x00000100,
565         0x897c, 0xffffffff, 0x06000100,
566         0x9868, 0xffffffff, 0x00000100,
567         0x9510, 0xffffffff, 0x00000100,
568         0xaf04, 0xffffffff, 0x00000100,
569         0xae04, 0xffffffff, 0x00000100,
570         0x949c, 0xffffffff, 0x00000100,
571         0x802c, 0xffffffff, 0xe0000000,
572         0x9160, 0xffffffff, 0x00010000,
573         0x9164, 0xffffffff, 0x00030002,
574         0x9168, 0xffffffff, 0x00040007,
575         0x916c, 0xffffffff, 0x00060005,
576         0x9170, 0xffffffff, 0x00090008,
577         0x9174, 0xffffffff, 0x00020001,
578         0x9178, 0xffffffff, 0x00040003,
579         0x917c, 0xffffffff, 0x00000007,
580         0x9180, 0xffffffff, 0x00060005,
581         0x9184, 0xffffffff, 0x00090008,
582         0x9188, 0xffffffff, 0x00030002,
583         0x918c, 0xffffffff, 0x00050004,
584         0x9190, 0xffffffff, 0x00000008,
585         0x9194, 0xffffffff, 0x00070006,
586         0x9198, 0xffffffff, 0x000a0009,
587         0x919c, 0xffffffff, 0x00040003,
588         0x91a0, 0xffffffff, 0x00060005,
589         0x91a4, 0xffffffff, 0x00000009,
590         0x91a8, 0xffffffff, 0x00080007,
591         0x91ac, 0xffffffff, 0x000b000a,
592         0x91b0, 0xffffffff, 0x00050004,
593         0x91b4, 0xffffffff, 0x00070006,
594         0x91b8, 0xffffffff, 0x0008000b,
595         0x91bc, 0xffffffff, 0x000a0009,
596         0x91c0, 0xffffffff, 0x000d000c,
597         0x91c4, 0xffffffff, 0x00060005,
598         0x91c8, 0xffffffff, 0x00080007,
599         0x91cc, 0xffffffff, 0x0000000b,
600         0x91d0, 0xffffffff, 0x000a0009,
601         0x91d4, 0xffffffff, 0x000d000c,
602         0x91d8, 0xffffffff, 0x00070006,
603         0x91dc, 0xffffffff, 0x00090008,
604         0x91e0, 0xffffffff, 0x0000000c,
605         0x91e4, 0xffffffff, 0x000b000a,
606         0x91e8, 0xffffffff, 0x000e000d,
607         0x91ec, 0xffffffff, 0x00080007,
608         0x91f0, 0xffffffff, 0x000a0009,
609         0x91f4, 0xffffffff, 0x0000000d,
610         0x91f8, 0xffffffff, 0x000c000b,
611         0x91fc, 0xffffffff, 0x000f000e,
612         0x9200, 0xffffffff, 0x00090008,
613         0x9204, 0xffffffff, 0x000b000a,
614         0x9208, 0xffffffff, 0x000c000f,
615         0x920c, 0xffffffff, 0x000e000d,
616         0x9210, 0xffffffff, 0x00110010,
617         0x9214, 0xffffffff, 0x000a0009,
618         0x9218, 0xffffffff, 0x000c000b,
619         0x921c, 0xffffffff, 0x0000000f,
620         0x9220, 0xffffffff, 0x000e000d,
621         0x9224, 0xffffffff, 0x00110010,
622         0x9228, 0xffffffff, 0x000b000a,
623         0x922c, 0xffffffff, 0x000d000c,
624         0x9230, 0xffffffff, 0x00000010,
625         0x9234, 0xffffffff, 0x000f000e,
626         0x9238, 0xffffffff, 0x00120011,
627         0x923c, 0xffffffff, 0x000c000b,
628         0x9240, 0xffffffff, 0x000e000d,
629         0x9244, 0xffffffff, 0x00000011,
630         0x9248, 0xffffffff, 0x0010000f,
631         0x924c, 0xffffffff, 0x00130012,
632         0x9250, 0xffffffff, 0x000d000c,
633         0x9254, 0xffffffff, 0x000f000e,
634         0x9258, 0xffffffff, 0x00100013,
635         0x925c, 0xffffffff, 0x00120011,
636         0x9260, 0xffffffff, 0x00150014,
637         0x9264, 0xffffffff, 0x000e000d,
638         0x9268, 0xffffffff, 0x0010000f,
639         0x926c, 0xffffffff, 0x00000013,
640         0x9270, 0xffffffff, 0x00120011,
641         0x9274, 0xffffffff, 0x00150014,
642         0x9278, 0xffffffff, 0x000f000e,
643         0x927c, 0xffffffff, 0x00110010,
644         0x9280, 0xffffffff, 0x00000014,
645         0x9284, 0xffffffff, 0x00130012,
646         0x9288, 0xffffffff, 0x00160015,
647         0x928c, 0xffffffff, 0x0010000f,
648         0x9290, 0xffffffff, 0x00120011,
649         0x9294, 0xffffffff, 0x00000015,
650         0x9298, 0xffffffff, 0x00140013,
651         0x929c, 0xffffffff, 0x00170016,
652         0x9150, 0xffffffff, 0x96940200,
653         0x8708, 0xffffffff, 0x00900100,
654         0xc478, 0xffffffff, 0x00000080,
655         0xc404, 0xffffffff, 0x0020003f,
656         0x30, 0xffffffff, 0x0000001c,
657         0x34, 0x000f0000, 0x000f0000,
658         0x160c, 0xffffffff, 0x00000100,
659         0x1024, 0xffffffff, 0x00000100,
660         0x102c, 0x00000101, 0x00000000,
661         0x20a8, 0xffffffff, 0x00000104,
662         0x264c, 0x000c0000, 0x000c0000,
663         0x2648, 0x000c0000, 0x000c0000,
664         0x55e4, 0xff000fff, 0x00000100,
665         0x55e8, 0x00000001, 0x00000001,
666         0x2f50, 0x00000001, 0x00000001,
667         0x30cc, 0xc0000fff, 0x00000104,
668         0xc1e4, 0x00000001, 0x00000001,
669         0xd0c0, 0xfffffff0, 0x00000100,
670         0xd8c0, 0xfffffff0, 0x00000100
671 };
672
673 static const u32 pitcairn_mgcg_cgcg_init[] =
674 {
675         0xc400, 0xffffffff, 0xfffffffc,
676         0x802c, 0xffffffff, 0xe0000000,
677         0x9a60, 0xffffffff, 0x00000100,
678         0x92a4, 0xffffffff, 0x00000100,
679         0xc164, 0xffffffff, 0x00000100,
680         0x9774, 0xffffffff, 0x00000100,
681         0x8984, 0xffffffff, 0x06000100,
682         0x8a18, 0xffffffff, 0x00000100,
683         0x92a0, 0xffffffff, 0x00000100,
684         0xc380, 0xffffffff, 0x00000100,
685         0x8b28, 0xffffffff, 0x00000100,
686         0x9144, 0xffffffff, 0x00000100,
687         0x8d88, 0xffffffff, 0x00000100,
688         0x8d8c, 0xffffffff, 0x00000100,
689         0x9030, 0xffffffff, 0x00000100,
690         0x9034, 0xffffffff, 0x00000100,
691         0x9038, 0xffffffff, 0x00000100,
692         0x903c, 0xffffffff, 0x00000100,
693         0xad80, 0xffffffff, 0x00000100,
694         0xac54, 0xffffffff, 0x00000100,
695         0x897c, 0xffffffff, 0x06000100,
696         0x9868, 0xffffffff, 0x00000100,
697         0x9510, 0xffffffff, 0x00000100,
698         0xaf04, 0xffffffff, 0x00000100,
699         0xae04, 0xffffffff, 0x00000100,
700         0x949c, 0xffffffff, 0x00000100,
701         0x802c, 0xffffffff, 0xe0000000,
702         0x9160, 0xffffffff, 0x00010000,
703         0x9164, 0xffffffff, 0x00030002,
704         0x9168, 0xffffffff, 0x00040007,
705         0x916c, 0xffffffff, 0x00060005,
706         0x9170, 0xffffffff, 0x00090008,
707         0x9174, 0xffffffff, 0x00020001,
708         0x9178, 0xffffffff, 0x00040003,
709         0x917c, 0xffffffff, 0x00000007,
710         0x9180, 0xffffffff, 0x00060005,
711         0x9184, 0xffffffff, 0x00090008,
712         0x9188, 0xffffffff, 0x00030002,
713         0x918c, 0xffffffff, 0x00050004,
714         0x9190, 0xffffffff, 0x00000008,
715         0x9194, 0xffffffff, 0x00070006,
716         0x9198, 0xffffffff, 0x000a0009,
717         0x919c, 0xffffffff, 0x00040003,
718         0x91a0, 0xffffffff, 0x00060005,
719         0x91a4, 0xffffffff, 0x00000009,
720         0x91a8, 0xffffffff, 0x00080007,
721         0x91ac, 0xffffffff, 0x000b000a,
722         0x91b0, 0xffffffff, 0x00050004,
723         0x91b4, 0xffffffff, 0x00070006,
724         0x91b8, 0xffffffff, 0x0008000b,
725         0x91bc, 0xffffffff, 0x000a0009,
726         0x91c0, 0xffffffff, 0x000d000c,
727         0x9200, 0xffffffff, 0x00090008,
728         0x9204, 0xffffffff, 0x000b000a,
729         0x9208, 0xffffffff, 0x000c000f,
730         0x920c, 0xffffffff, 0x000e000d,
731         0x9210, 0xffffffff, 0x00110010,
732         0x9214, 0xffffffff, 0x000a0009,
733         0x9218, 0xffffffff, 0x000c000b,
734         0x921c, 0xffffffff, 0x0000000f,
735         0x9220, 0xffffffff, 0x000e000d,
736         0x9224, 0xffffffff, 0x00110010,
737         0x9228, 0xffffffff, 0x000b000a,
738         0x922c, 0xffffffff, 0x000d000c,
739         0x9230, 0xffffffff, 0x00000010,
740         0x9234, 0xffffffff, 0x000f000e,
741         0x9238, 0xffffffff, 0x00120011,
742         0x923c, 0xffffffff, 0x000c000b,
743         0x9240, 0xffffffff, 0x000e000d,
744         0x9244, 0xffffffff, 0x00000011,
745         0x9248, 0xffffffff, 0x0010000f,
746         0x924c, 0xffffffff, 0x00130012,
747         0x9250, 0xffffffff, 0x000d000c,
748         0x9254, 0xffffffff, 0x000f000e,
749         0x9258, 0xffffffff, 0x00100013,
750         0x925c, 0xffffffff, 0x00120011,
751         0x9260, 0xffffffff, 0x00150014,
752         0x9150, 0xffffffff, 0x96940200,
753         0x8708, 0xffffffff, 0x00900100,
754         0xc478, 0xffffffff, 0x00000080,
755         0xc404, 0xffffffff, 0x0020003f,
756         0x30, 0xffffffff, 0x0000001c,
757         0x34, 0x000f0000, 0x000f0000,
758         0x160c, 0xffffffff, 0x00000100,
759         0x1024, 0xffffffff, 0x00000100,
760         0x102c, 0x00000101, 0x00000000,
761         0x20a8, 0xffffffff, 0x00000104,
762         0x55e4, 0xff000fff, 0x00000100,
763         0x55e8, 0x00000001, 0x00000001,
764         0x2f50, 0x00000001, 0x00000001,
765         0x30cc, 0xc0000fff, 0x00000104,
766         0xc1e4, 0x00000001, 0x00000001,
767         0xd0c0, 0xfffffff0, 0x00000100,
768         0xd8c0, 0xfffffff0, 0x00000100
769 };
770
771 static const u32 verde_mgcg_cgcg_init[] =
772 {
773         0xc400, 0xffffffff, 0xfffffffc,
774         0x802c, 0xffffffff, 0xe0000000,
775         0x9a60, 0xffffffff, 0x00000100,
776         0x92a4, 0xffffffff, 0x00000100,
777         0xc164, 0xffffffff, 0x00000100,
778         0x9774, 0xffffffff, 0x00000100,
779         0x8984, 0xffffffff, 0x06000100,
780         0x8a18, 0xffffffff, 0x00000100,
781         0x92a0, 0xffffffff, 0x00000100,
782         0xc380, 0xffffffff, 0x00000100,
783         0x8b28, 0xffffffff, 0x00000100,
784         0x9144, 0xffffffff, 0x00000100,
785         0x8d88, 0xffffffff, 0x00000100,
786         0x8d8c, 0xffffffff, 0x00000100,
787         0x9030, 0xffffffff, 0x00000100,
788         0x9034, 0xffffffff, 0x00000100,
789         0x9038, 0xffffffff, 0x00000100,
790         0x903c, 0xffffffff, 0x00000100,
791         0xad80, 0xffffffff, 0x00000100,
792         0xac54, 0xffffffff, 0x00000100,
793         0x897c, 0xffffffff, 0x06000100,
794         0x9868, 0xffffffff, 0x00000100,
795         0x9510, 0xffffffff, 0x00000100,
796         0xaf04, 0xffffffff, 0x00000100,
797         0xae04, 0xffffffff, 0x00000100,
798         0x949c, 0xffffffff, 0x00000100,
799         0x802c, 0xffffffff, 0xe0000000,
800         0x9160, 0xffffffff, 0x00010000,
801         0x9164, 0xffffffff, 0x00030002,
802         0x9168, 0xffffffff, 0x00040007,
803         0x916c, 0xffffffff, 0x00060005,
804         0x9170, 0xffffffff, 0x00090008,
805         0x9174, 0xffffffff, 0x00020001,
806         0x9178, 0xffffffff, 0x00040003,
807         0x917c, 0xffffffff, 0x00000007,
808         0x9180, 0xffffffff, 0x00060005,
809         0x9184, 0xffffffff, 0x00090008,
810         0x9188, 0xffffffff, 0x00030002,
811         0x918c, 0xffffffff, 0x00050004,
812         0x9190, 0xffffffff, 0x00000008,
813         0x9194, 0xffffffff, 0x00070006,
814         0x9198, 0xffffffff, 0x000a0009,
815         0x919c, 0xffffffff, 0x00040003,
816         0x91a0, 0xffffffff, 0x00060005,
817         0x91a4, 0xffffffff, 0x00000009,
818         0x91a8, 0xffffffff, 0x00080007,
819         0x91ac, 0xffffffff, 0x000b000a,
820         0x91b0, 0xffffffff, 0x00050004,
821         0x91b4, 0xffffffff, 0x00070006,
822         0x91b8, 0xffffffff, 0x0008000b,
823         0x91bc, 0xffffffff, 0x000a0009,
824         0x91c0, 0xffffffff, 0x000d000c,
825         0x9200, 0xffffffff, 0x00090008,
826         0x9204, 0xffffffff, 0x000b000a,
827         0x9208, 0xffffffff, 0x000c000f,
828         0x920c, 0xffffffff, 0x000e000d,
829         0x9210, 0xffffffff, 0x00110010,
830         0x9214, 0xffffffff, 0x000a0009,
831         0x9218, 0xffffffff, 0x000c000b,
832         0x921c, 0xffffffff, 0x0000000f,
833         0x9220, 0xffffffff, 0x000e000d,
834         0x9224, 0xffffffff, 0x00110010,
835         0x9228, 0xffffffff, 0x000b000a,
836         0x922c, 0xffffffff, 0x000d000c,
837         0x9230, 0xffffffff, 0x00000010,
838         0x9234, 0xffffffff, 0x000f000e,
839         0x9238, 0xffffffff, 0x00120011,
840         0x923c, 0xffffffff, 0x000c000b,
841         0x9240, 0xffffffff, 0x000e000d,
842         0x9244, 0xffffffff, 0x00000011,
843         0x9248, 0xffffffff, 0x0010000f,
844         0x924c, 0xffffffff, 0x00130012,
845         0x9250, 0xffffffff, 0x000d000c,
846         0x9254, 0xffffffff, 0x000f000e,
847         0x9258, 0xffffffff, 0x00100013,
848         0x925c, 0xffffffff, 0x00120011,
849         0x9260, 0xffffffff, 0x00150014,
850         0x9150, 0xffffffff, 0x96940200,
851         0x8708, 0xffffffff, 0x00900100,
852         0xc478, 0xffffffff, 0x00000080,
853         0xc404, 0xffffffff, 0x0020003f,
854         0x30, 0xffffffff, 0x0000001c,
855         0x34, 0x000f0000, 0x000f0000,
856         0x160c, 0xffffffff, 0x00000100,
857         0x1024, 0xffffffff, 0x00000100,
858         0x102c, 0x00000101, 0x00000000,
859         0x20a8, 0xffffffff, 0x00000104,
860         0x264c, 0x000c0000, 0x000c0000,
861         0x2648, 0x000c0000, 0x000c0000,
862         0x55e4, 0xff000fff, 0x00000100,
863         0x55e8, 0x00000001, 0x00000001,
864         0x2f50, 0x00000001, 0x00000001,
865         0x30cc, 0xc0000fff, 0x00000104,
866         0xc1e4, 0x00000001, 0x00000001,
867         0xd0c0, 0xfffffff0, 0x00000100,
868         0xd8c0, 0xfffffff0, 0x00000100
869 };
870
871 static const u32 oland_mgcg_cgcg_init[] =
872 {
873         0xc400, 0xffffffff, 0xfffffffc,
874         0x802c, 0xffffffff, 0xe0000000,
875         0x9a60, 0xffffffff, 0x00000100,
876         0x92a4, 0xffffffff, 0x00000100,
877         0xc164, 0xffffffff, 0x00000100,
878         0x9774, 0xffffffff, 0x00000100,
879         0x8984, 0xffffffff, 0x06000100,
880         0x8a18, 0xffffffff, 0x00000100,
881         0x92a0, 0xffffffff, 0x00000100,
882         0xc380, 0xffffffff, 0x00000100,
883         0x8b28, 0xffffffff, 0x00000100,
884         0x9144, 0xffffffff, 0x00000100,
885         0x8d88, 0xffffffff, 0x00000100,
886         0x8d8c, 0xffffffff, 0x00000100,
887         0x9030, 0xffffffff, 0x00000100,
888         0x9034, 0xffffffff, 0x00000100,
889         0x9038, 0xffffffff, 0x00000100,
890         0x903c, 0xffffffff, 0x00000100,
891         0xad80, 0xffffffff, 0x00000100,
892         0xac54, 0xffffffff, 0x00000100,
893         0x897c, 0xffffffff, 0x06000100,
894         0x9868, 0xffffffff, 0x00000100,
895         0x9510, 0xffffffff, 0x00000100,
896         0xaf04, 0xffffffff, 0x00000100,
897         0xae04, 0xffffffff, 0x00000100,
898         0x949c, 0xffffffff, 0x00000100,
899         0x802c, 0xffffffff, 0xe0000000,
900         0x9160, 0xffffffff, 0x00010000,
901         0x9164, 0xffffffff, 0x00030002,
902         0x9168, 0xffffffff, 0x00040007,
903         0x916c, 0xffffffff, 0x00060005,
904         0x9170, 0xffffffff, 0x00090008,
905         0x9174, 0xffffffff, 0x00020001,
906         0x9178, 0xffffffff, 0x00040003,
907         0x917c, 0xffffffff, 0x00000007,
908         0x9180, 0xffffffff, 0x00060005,
909         0x9184, 0xffffffff, 0x00090008,
910         0x9188, 0xffffffff, 0x00030002,
911         0x918c, 0xffffffff, 0x00050004,
912         0x9190, 0xffffffff, 0x00000008,
913         0x9194, 0xffffffff, 0x00070006,
914         0x9198, 0xffffffff, 0x000a0009,
915         0x919c, 0xffffffff, 0x00040003,
916         0x91a0, 0xffffffff, 0x00060005,
917         0x91a4, 0xffffffff, 0x00000009,
918         0x91a8, 0xffffffff, 0x00080007,
919         0x91ac, 0xffffffff, 0x000b000a,
920         0x91b0, 0xffffffff, 0x00050004,
921         0x91b4, 0xffffffff, 0x00070006,
922         0x91b8, 0xffffffff, 0x0008000b,
923         0x91bc, 0xffffffff, 0x000a0009,
924         0x91c0, 0xffffffff, 0x000d000c,
925         0x91c4, 0xffffffff, 0x00060005,
926         0x91c8, 0xffffffff, 0x00080007,
927         0x91cc, 0xffffffff, 0x0000000b,
928         0x91d0, 0xffffffff, 0x000a0009,
929         0x91d4, 0xffffffff, 0x000d000c,
930         0x9150, 0xffffffff, 0x96940200,
931         0x8708, 0xffffffff, 0x00900100,
932         0xc478, 0xffffffff, 0x00000080,
933         0xc404, 0xffffffff, 0x0020003f,
934         0x30, 0xffffffff, 0x0000001c,
935         0x34, 0x000f0000, 0x000f0000,
936         0x160c, 0xffffffff, 0x00000100,
937         0x1024, 0xffffffff, 0x00000100,
938         0x102c, 0x00000101, 0x00000000,
939         0x20a8, 0xffffffff, 0x00000104,
940         0x264c, 0x000c0000, 0x000c0000,
941         0x2648, 0x000c0000, 0x000c0000,
942         0x55e4, 0xff000fff, 0x00000100,
943         0x55e8, 0x00000001, 0x00000001,
944         0x2f50, 0x00000001, 0x00000001,
945         0x30cc, 0xc0000fff, 0x00000104,
946         0xc1e4, 0x00000001, 0x00000001,
947         0xd0c0, 0xfffffff0, 0x00000100,
948         0xd8c0, 0xfffffff0, 0x00000100
949 };
950
951 static const u32 hainan_mgcg_cgcg_init[] =
952 {
953         0xc400, 0xffffffff, 0xfffffffc,
954         0x802c, 0xffffffff, 0xe0000000,
955         0x9a60, 0xffffffff, 0x00000100,
956         0x92a4, 0xffffffff, 0x00000100,
957         0xc164, 0xffffffff, 0x00000100,
958         0x9774, 0xffffffff, 0x00000100,
959         0x8984, 0xffffffff, 0x06000100,
960         0x8a18, 0xffffffff, 0x00000100,
961         0x92a0, 0xffffffff, 0x00000100,
962         0xc380, 0xffffffff, 0x00000100,
963         0x8b28, 0xffffffff, 0x00000100,
964         0x9144, 0xffffffff, 0x00000100,
965         0x8d88, 0xffffffff, 0x00000100,
966         0x8d8c, 0xffffffff, 0x00000100,
967         0x9030, 0xffffffff, 0x00000100,
968         0x9034, 0xffffffff, 0x00000100,
969         0x9038, 0xffffffff, 0x00000100,
970         0x903c, 0xffffffff, 0x00000100,
971         0xad80, 0xffffffff, 0x00000100,
972         0xac54, 0xffffffff, 0x00000100,
973         0x897c, 0xffffffff, 0x06000100,
974         0x9868, 0xffffffff, 0x00000100,
975         0x9510, 0xffffffff, 0x00000100,
976         0xaf04, 0xffffffff, 0x00000100,
977         0xae04, 0xffffffff, 0x00000100,
978         0x949c, 0xffffffff, 0x00000100,
979         0x802c, 0xffffffff, 0xe0000000,
980         0x9160, 0xffffffff, 0x00010000,
981         0x9164, 0xffffffff, 0x00030002,
982         0x9168, 0xffffffff, 0x00040007,
983         0x916c, 0xffffffff, 0x00060005,
984         0x9170, 0xffffffff, 0x00090008,
985         0x9174, 0xffffffff, 0x00020001,
986         0x9178, 0xffffffff, 0x00040003,
987         0x917c, 0xffffffff, 0x00000007,
988         0x9180, 0xffffffff, 0x00060005,
989         0x9184, 0xffffffff, 0x00090008,
990         0x9188, 0xffffffff, 0x00030002,
991         0x918c, 0xffffffff, 0x00050004,
992         0x9190, 0xffffffff, 0x00000008,
993         0x9194, 0xffffffff, 0x00070006,
994         0x9198, 0xffffffff, 0x000a0009,
995         0x919c, 0xffffffff, 0x00040003,
996         0x91a0, 0xffffffff, 0x00060005,
997         0x91a4, 0xffffffff, 0x00000009,
998         0x91a8, 0xffffffff, 0x00080007,
999         0x91ac, 0xffffffff, 0x000b000a,
1000         0x91b0, 0xffffffff, 0x00050004,
1001         0x91b4, 0xffffffff, 0x00070006,
1002         0x91b8, 0xffffffff, 0x0008000b,
1003         0x91bc, 0xffffffff, 0x000a0009,
1004         0x91c0, 0xffffffff, 0x000d000c,
1005         0x91c4, 0xffffffff, 0x00060005,
1006         0x91c8, 0xffffffff, 0x00080007,
1007         0x91cc, 0xffffffff, 0x0000000b,
1008         0x91d0, 0xffffffff, 0x000a0009,
1009         0x91d4, 0xffffffff, 0x000d000c,
1010         0x9150, 0xffffffff, 0x96940200,
1011         0x8708, 0xffffffff, 0x00900100,
1012         0xc478, 0xffffffff, 0x00000080,
1013         0xc404, 0xffffffff, 0x0020003f,
1014         0x30, 0xffffffff, 0x0000001c,
1015         0x34, 0x000f0000, 0x000f0000,
1016         0x160c, 0xffffffff, 0x00000100,
1017         0x1024, 0xffffffff, 0x00000100,
1018         0x20a8, 0xffffffff, 0x00000104,
1019         0x264c, 0x000c0000, 0x000c0000,
1020         0x2648, 0x000c0000, 0x000c0000,
1021         0x2f50, 0x00000001, 0x00000001,
1022         0x30cc, 0xc0000fff, 0x00000104,
1023         0xc1e4, 0x00000001, 0x00000001,
1024         0xd0c0, 0xfffffff0, 0x00000100,
1025         0xd8c0, 0xfffffff0, 0x00000100
1026 };
1027
1028 static u32 verde_pg_init[] =
1029 {
1030         0x353c, 0xffffffff, 0x40000,
1031         0x3538, 0xffffffff, 0x200010ff,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x7007,
1038         0x3538, 0xffffffff, 0x300010ff,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x400000,
1045         0x3538, 0xffffffff, 0x100010ff,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x120200,
1052         0x3538, 0xffffffff, 0x500010ff,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x1e1e16,
1059         0x3538, 0xffffffff, 0x600010ff,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x171f1e,
1066         0x3538, 0xffffffff, 0x700010ff,
1067         0x353c, 0xffffffff, 0x0,
1068         0x353c, 0xffffffff, 0x0,
1069         0x353c, 0xffffffff, 0x0,
1070         0x353c, 0xffffffff, 0x0,
1071         0x353c, 0xffffffff, 0x0,
1072         0x353c, 0xffffffff, 0x0,
1073         0x3538, 0xffffffff, 0x9ff,
1074         0x3500, 0xffffffff, 0x0,
1075         0x3504, 0xffffffff, 0x10000800,
1076         0x3504, 0xffffffff, 0xf,
1077         0x3504, 0xffffffff, 0xf,
1078         0x3500, 0xffffffff, 0x4,
1079         0x3504, 0xffffffff, 0x1000051e,
1080         0x3504, 0xffffffff, 0xffff,
1081         0x3504, 0xffffffff, 0xffff,
1082         0x3500, 0xffffffff, 0x8,
1083         0x3504, 0xffffffff, 0x80500,
1084         0x3500, 0xffffffff, 0x12,
1085         0x3504, 0xffffffff, 0x9050c,
1086         0x3500, 0xffffffff, 0x1d,
1087         0x3504, 0xffffffff, 0xb052c,
1088         0x3500, 0xffffffff, 0x2a,
1089         0x3504, 0xffffffff, 0x1053e,
1090         0x3500, 0xffffffff, 0x2d,
1091         0x3504, 0xffffffff, 0x10546,
1092         0x3500, 0xffffffff, 0x30,
1093         0x3504, 0xffffffff, 0xa054e,
1094         0x3500, 0xffffffff, 0x3c,
1095         0x3504, 0xffffffff, 0x1055f,
1096         0x3500, 0xffffffff, 0x3f,
1097         0x3504, 0xffffffff, 0x10567,
1098         0x3500, 0xffffffff, 0x42,
1099         0x3504, 0xffffffff, 0x1056f,
1100         0x3500, 0xffffffff, 0x45,
1101         0x3504, 0xffffffff, 0x10572,
1102         0x3500, 0xffffffff, 0x48,
1103         0x3504, 0xffffffff, 0x20575,
1104         0x3500, 0xffffffff, 0x4c,
1105         0x3504, 0xffffffff, 0x190801,
1106         0x3500, 0xffffffff, 0x67,
1107         0x3504, 0xffffffff, 0x1082a,
1108         0x3500, 0xffffffff, 0x6a,
1109         0x3504, 0xffffffff, 0x1b082d,
1110         0x3500, 0xffffffff, 0x87,
1111         0x3504, 0xffffffff, 0x310851,
1112         0x3500, 0xffffffff, 0xba,
1113         0x3504, 0xffffffff, 0x891,
1114         0x3500, 0xffffffff, 0xbc,
1115         0x3504, 0xffffffff, 0x893,
1116         0x3500, 0xffffffff, 0xbe,
1117         0x3504, 0xffffffff, 0x20895,
1118         0x3500, 0xffffffff, 0xc2,
1119         0x3504, 0xffffffff, 0x20899,
1120         0x3500, 0xffffffff, 0xc6,
1121         0x3504, 0xffffffff, 0x2089d,
1122         0x3500, 0xffffffff, 0xca,
1123         0x3504, 0xffffffff, 0x8a1,
1124         0x3500, 0xffffffff, 0xcc,
1125         0x3504, 0xffffffff, 0x8a3,
1126         0x3500, 0xffffffff, 0xce,
1127         0x3504, 0xffffffff, 0x308a5,
1128         0x3500, 0xffffffff, 0xd3,
1129         0x3504, 0xffffffff, 0x6d08cd,
1130         0x3500, 0xffffffff, 0x142,
1131         0x3504, 0xffffffff, 0x2000095a,
1132         0x3504, 0xffffffff, 0x1,
1133         0x3500, 0xffffffff, 0x144,
1134         0x3504, 0xffffffff, 0x301f095b,
1135         0x3500, 0xffffffff, 0x165,
1136         0x3504, 0xffffffff, 0xc094d,
1137         0x3500, 0xffffffff, 0x173,
1138         0x3504, 0xffffffff, 0xf096d,
1139         0x3500, 0xffffffff, 0x184,
1140         0x3504, 0xffffffff, 0x15097f,
1141         0x3500, 0xffffffff, 0x19b,
1142         0x3504, 0xffffffff, 0xc0998,
1143         0x3500, 0xffffffff, 0x1a9,
1144         0x3504, 0xffffffff, 0x409a7,
1145         0x3500, 0xffffffff, 0x1af,
1146         0x3504, 0xffffffff, 0xcdc,
1147         0x3500, 0xffffffff, 0x1b1,
1148         0x3504, 0xffffffff, 0x800,
1149         0x3508, 0xffffffff, 0x6c9b2000,
1150         0x3510, 0xfc00, 0x2000,
1151         0x3544, 0xffffffff, 0xfc0,
1152         0x28d4, 0x00000100, 0x100
1153 };
1154
1155 static void si_init_golden_registers(struct radeon_device *rdev)
1156 {
1157         switch (rdev->family) {
1158         case CHIP_TAHITI:
1159                 radeon_program_register_sequence(rdev,
1160                                                  tahiti_golden_registers,
1161                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1162                 radeon_program_register_sequence(rdev,
1163                                                  tahiti_golden_rlc_registers,
1164                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1165                 radeon_program_register_sequence(rdev,
1166                                                  tahiti_mgcg_cgcg_init,
1167                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1168                 radeon_program_register_sequence(rdev,
1169                                                  tahiti_golden_registers2,
1170                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1171                 break;
1172         case CHIP_PITCAIRN:
1173                 radeon_program_register_sequence(rdev,
1174                                                  pitcairn_golden_registers,
1175                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1176                 radeon_program_register_sequence(rdev,
1177                                                  pitcairn_golden_rlc_registers,
1178                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1179                 radeon_program_register_sequence(rdev,
1180                                                  pitcairn_mgcg_cgcg_init,
1181                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1182                 break;
1183         case CHIP_VERDE:
1184                 radeon_program_register_sequence(rdev,
1185                                                  verde_golden_registers,
1186                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1187                 radeon_program_register_sequence(rdev,
1188                                                  verde_golden_rlc_registers,
1189                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1190                 radeon_program_register_sequence(rdev,
1191                                                  verde_mgcg_cgcg_init,
1192                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1193                 radeon_program_register_sequence(rdev,
1194                                                  verde_pg_init,
1195                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1196                 break;
1197         case CHIP_OLAND:
1198                 radeon_program_register_sequence(rdev,
1199                                                  oland_golden_registers,
1200                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1201                 radeon_program_register_sequence(rdev,
1202                                                  oland_golden_rlc_registers,
1203                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1204                 radeon_program_register_sequence(rdev,
1205                                                  oland_mgcg_cgcg_init,
1206                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1207                 break;
1208         case CHIP_HAINAN:
1209                 radeon_program_register_sequence(rdev,
1210                                                  hainan_golden_registers,
1211                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1212                 radeon_program_register_sequence(rdev,
1213                                                  hainan_golden_registers2,
1214                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1215                 radeon_program_register_sequence(rdev,
1216                                                  hainan_mgcg_cgcg_init,
1217                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1218                 break;
1219         default:
1220                 break;
1221         }
1222 }
1223
1224 #define PCIE_BUS_CLK                10000
1225 #define TCLK                        (PCIE_BUS_CLK / 10)
1226
1227 /**
1228  * si_get_xclk - get the xclk
1229  *
1230  * @rdev: radeon_device pointer
1231  *
1232  * Returns the reference clock used by the gfx engine
1233  * (SI).
1234  */
1235 u32 si_get_xclk(struct radeon_device *rdev)
1236 {
1237         u32 reference_clock = rdev->clock.spll.reference_freq;
1238         u32 tmp;
1239
1240         tmp = RREG32(CG_CLKPIN_CNTL_2);
1241         if (tmp & MUX_TCLK_TO_XCLK)
1242                 return TCLK;
1243
1244         tmp = RREG32(CG_CLKPIN_CNTL);
1245         if (tmp & XTALIN_DIVIDE)
1246                 return reference_clock / 4;
1247
1248         return reference_clock;
1249 }
1250
1251 /* get temperature in millidegrees */
1252 int si_get_temp(struct radeon_device *rdev)
1253 {
1254         u32 temp;
1255         int actual_temp = 0;
1256
1257         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1258                 CTF_TEMP_SHIFT;
1259
1260         if (temp & 0x200)
1261                 actual_temp = 255;
1262         else
1263                 actual_temp = temp & 0x1ff;
1264
1265         actual_temp = (actual_temp * 1000);
1266
1267         return actual_temp;
1268 }
1269
1270 #define TAHITI_IO_MC_REGS_SIZE 36
1271
1272 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1273         {0x0000006f, 0x03044000},
1274         {0x00000070, 0x0480c018},
1275         {0x00000071, 0x00000040},
1276         {0x00000072, 0x01000000},
1277         {0x00000074, 0x000000ff},
1278         {0x00000075, 0x00143400},
1279         {0x00000076, 0x08ec0800},
1280         {0x00000077, 0x040000cc},
1281         {0x00000079, 0x00000000},
1282         {0x0000007a, 0x21000409},
1283         {0x0000007c, 0x00000000},
1284         {0x0000007d, 0xe8000000},
1285         {0x0000007e, 0x044408a8},
1286         {0x0000007f, 0x00000003},
1287         {0x00000080, 0x00000000},
1288         {0x00000081, 0x01000000},
1289         {0x00000082, 0x02000000},
1290         {0x00000083, 0x00000000},
1291         {0x00000084, 0xe3f3e4f4},
1292         {0x00000085, 0x00052024},
1293         {0x00000087, 0x00000000},
1294         {0x00000088, 0x66036603},
1295         {0x00000089, 0x01000000},
1296         {0x0000008b, 0x1c0a0000},
1297         {0x0000008c, 0xff010000},
1298         {0x0000008e, 0xffffefff},
1299         {0x0000008f, 0xfff3efff},
1300         {0x00000090, 0xfff3efbf},
1301         {0x00000094, 0x00101101},
1302         {0x00000095, 0x00000fff},
1303         {0x00000096, 0x00116fff},
1304         {0x00000097, 0x60010000},
1305         {0x00000098, 0x10010000},
1306         {0x00000099, 0x00006000},
1307         {0x0000009a, 0x00001000},
1308         {0x0000009f, 0x00a77400}
1309 };
1310
1311 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1312         {0x0000006f, 0x03044000},
1313         {0x00000070, 0x0480c018},
1314         {0x00000071, 0x00000040},
1315         {0x00000072, 0x01000000},
1316         {0x00000074, 0x000000ff},
1317         {0x00000075, 0x00143400},
1318         {0x00000076, 0x08ec0800},
1319         {0x00000077, 0x040000cc},
1320         {0x00000079, 0x00000000},
1321         {0x0000007a, 0x21000409},
1322         {0x0000007c, 0x00000000},
1323         {0x0000007d, 0xe8000000},
1324         {0x0000007e, 0x044408a8},
1325         {0x0000007f, 0x00000003},
1326         {0x00000080, 0x00000000},
1327         {0x00000081, 0x01000000},
1328         {0x00000082, 0x02000000},
1329         {0x00000083, 0x00000000},
1330         {0x00000084, 0xe3f3e4f4},
1331         {0x00000085, 0x00052024},
1332         {0x00000087, 0x00000000},
1333         {0x00000088, 0x66036603},
1334         {0x00000089, 0x01000000},
1335         {0x0000008b, 0x1c0a0000},
1336         {0x0000008c, 0xff010000},
1337         {0x0000008e, 0xffffefff},
1338         {0x0000008f, 0xfff3efff},
1339         {0x00000090, 0xfff3efbf},
1340         {0x00000094, 0x00101101},
1341         {0x00000095, 0x00000fff},
1342         {0x00000096, 0x00116fff},
1343         {0x00000097, 0x60010000},
1344         {0x00000098, 0x10010000},
1345         {0x00000099, 0x00006000},
1346         {0x0000009a, 0x00001000},
1347         {0x0000009f, 0x00a47400}
1348 };
1349
1350 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351         {0x0000006f, 0x03044000},
1352         {0x00000070, 0x0480c018},
1353         {0x00000071, 0x00000040},
1354         {0x00000072, 0x01000000},
1355         {0x00000074, 0x000000ff},
1356         {0x00000075, 0x00143400},
1357         {0x00000076, 0x08ec0800},
1358         {0x00000077, 0x040000cc},
1359         {0x00000079, 0x00000000},
1360         {0x0000007a, 0x21000409},
1361         {0x0000007c, 0x00000000},
1362         {0x0000007d, 0xe8000000},
1363         {0x0000007e, 0x044408a8},
1364         {0x0000007f, 0x00000003},
1365         {0x00000080, 0x00000000},
1366         {0x00000081, 0x01000000},
1367         {0x00000082, 0x02000000},
1368         {0x00000083, 0x00000000},
1369         {0x00000084, 0xe3f3e4f4},
1370         {0x00000085, 0x00052024},
1371         {0x00000087, 0x00000000},
1372         {0x00000088, 0x66036603},
1373         {0x00000089, 0x01000000},
1374         {0x0000008b, 0x1c0a0000},
1375         {0x0000008c, 0xff010000},
1376         {0x0000008e, 0xffffefff},
1377         {0x0000008f, 0xfff3efff},
1378         {0x00000090, 0xfff3efbf},
1379         {0x00000094, 0x00101101},
1380         {0x00000095, 0x00000fff},
1381         {0x00000096, 0x00116fff},
1382         {0x00000097, 0x60010000},
1383         {0x00000098, 0x10010000},
1384         {0x00000099, 0x00006000},
1385         {0x0000009a, 0x00001000},
1386         {0x0000009f, 0x00a37400}
1387 };
1388
1389 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390         {0x0000006f, 0x03044000},
1391         {0x00000070, 0x0480c018},
1392         {0x00000071, 0x00000040},
1393         {0x00000072, 0x01000000},
1394         {0x00000074, 0x000000ff},
1395         {0x00000075, 0x00143400},
1396         {0x00000076, 0x08ec0800},
1397         {0x00000077, 0x040000cc},
1398         {0x00000079, 0x00000000},
1399         {0x0000007a, 0x21000409},
1400         {0x0000007c, 0x00000000},
1401         {0x0000007d, 0xe8000000},
1402         {0x0000007e, 0x044408a8},
1403         {0x0000007f, 0x00000003},
1404         {0x00000080, 0x00000000},
1405         {0x00000081, 0x01000000},
1406         {0x00000082, 0x02000000},
1407         {0x00000083, 0x00000000},
1408         {0x00000084, 0xe3f3e4f4},
1409         {0x00000085, 0x00052024},
1410         {0x00000087, 0x00000000},
1411         {0x00000088, 0x66036603},
1412         {0x00000089, 0x01000000},
1413         {0x0000008b, 0x1c0a0000},
1414         {0x0000008c, 0xff010000},
1415         {0x0000008e, 0xffffefff},
1416         {0x0000008f, 0xfff3efff},
1417         {0x00000090, 0xfff3efbf},
1418         {0x00000094, 0x00101101},
1419         {0x00000095, 0x00000fff},
1420         {0x00000096, 0x00116fff},
1421         {0x00000097, 0x60010000},
1422         {0x00000098, 0x10010000},
1423         {0x00000099, 0x00006000},
1424         {0x0000009a, 0x00001000},
1425         {0x0000009f, 0x00a17730}
1426 };
1427
1428 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429         {0x0000006f, 0x03044000},
1430         {0x00000070, 0x0480c018},
1431         {0x00000071, 0x00000040},
1432         {0x00000072, 0x01000000},
1433         {0x00000074, 0x000000ff},
1434         {0x00000075, 0x00143400},
1435         {0x00000076, 0x08ec0800},
1436         {0x00000077, 0x040000cc},
1437         {0x00000079, 0x00000000},
1438         {0x0000007a, 0x21000409},
1439         {0x0000007c, 0x00000000},
1440         {0x0000007d, 0xe8000000},
1441         {0x0000007e, 0x044408a8},
1442         {0x0000007f, 0x00000003},
1443         {0x00000080, 0x00000000},
1444         {0x00000081, 0x01000000},
1445         {0x00000082, 0x02000000},
1446         {0x00000083, 0x00000000},
1447         {0x00000084, 0xe3f3e4f4},
1448         {0x00000085, 0x00052024},
1449         {0x00000087, 0x00000000},
1450         {0x00000088, 0x66036603},
1451         {0x00000089, 0x01000000},
1452         {0x0000008b, 0x1c0a0000},
1453         {0x0000008c, 0xff010000},
1454         {0x0000008e, 0xffffefff},
1455         {0x0000008f, 0xfff3efff},
1456         {0x00000090, 0xfff3efbf},
1457         {0x00000094, 0x00101101},
1458         {0x00000095, 0x00000fff},
1459         {0x00000096, 0x00116fff},
1460         {0x00000097, 0x60010000},
1461         {0x00000098, 0x10010000},
1462         {0x00000099, 0x00006000},
1463         {0x0000009a, 0x00001000},
1464         {0x0000009f, 0x00a07730}
1465 };
1466
1467 /* ucode loading */
1468 static int si_mc_load_microcode(struct radeon_device *rdev)
1469 {
1470         const __be32 *fw_data;
1471         u32 running, blackout = 0;
1472         u32 *io_mc_regs;
1473         int i, ucode_size, regs_size;
1474
1475         if (!rdev->mc_fw)
1476                 return -EINVAL;
1477
1478         switch (rdev->family) {
1479         case CHIP_TAHITI:
1480                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1481                 ucode_size = SI_MC_UCODE_SIZE;
1482                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1483                 break;
1484         case CHIP_PITCAIRN:
1485                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1486                 ucode_size = SI_MC_UCODE_SIZE;
1487                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1488                 break;
1489         case CHIP_VERDE:
1490         default:
1491                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1492                 ucode_size = SI_MC_UCODE_SIZE;
1493                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1494                 break;
1495         case CHIP_OLAND:
1496                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1497                 ucode_size = OLAND_MC_UCODE_SIZE;
1498                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1499                 break;
1500         case CHIP_HAINAN:
1501                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1502                 ucode_size = OLAND_MC_UCODE_SIZE;
1503                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1504                 break;
1505         }
1506
1507         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1508
1509         if (running == 0) {
1510                 if (running) {
1511                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1512                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1513                 }
1514
1515                 /* reset the engine and set to writable */
1516                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1517                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1518
1519                 /* load mc io regs */
1520                 for (i = 0; i < regs_size; i++) {
1521                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1522                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1523                 }
1524                 /* load the MC ucode */
1525                 fw_data = (const __be32 *)rdev->mc_fw->data;
1526                 for (i = 0; i < ucode_size; i++)
1527                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1528
1529                 /* put the engine back into the active state */
1530                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1532                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1533
1534                 /* wait for training to complete */
1535                 for (i = 0; i < rdev->usec_timeout; i++) {
1536                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1537                                 break;
1538                         udelay(1);
1539                 }
1540                 for (i = 0; i < rdev->usec_timeout; i++) {
1541                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1542                                 break;
1543                         udelay(1);
1544                 }
1545
1546                 if (running)
1547                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1548         }
1549
1550         return 0;
1551 }
1552
1553 static int si_init_microcode(struct radeon_device *rdev)
1554 {
1555         const char *chip_name;
1556         const char *rlc_chip_name;
1557         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1558         size_t smc_req_size;
1559         char fw_name[30];
1560         int err;
1561
1562         DRM_DEBUG("\n");
1563
1564         switch (rdev->family) {
1565         case CHIP_TAHITI:
1566                 chip_name = "TAHITI";
1567                 rlc_chip_name = "TAHITI";
1568                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1569                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1570                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1571                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1572                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1573                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1574                 break;
1575         case CHIP_PITCAIRN:
1576                 chip_name = "PITCAIRN";
1577                 rlc_chip_name = "PITCAIRN";
1578                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1579                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1580                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1581                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1582                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1583                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1584                 break;
1585         case CHIP_VERDE:
1586                 chip_name = "VERDE";
1587                 rlc_chip_name = "VERDE";
1588                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1589                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1590                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1591                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1592                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1593                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1594                 break;
1595         case CHIP_OLAND:
1596                 chip_name = "OLAND";
1597                 rlc_chip_name = "OLAND";
1598                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1599                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1600                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1601                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1602                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1603                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1604                 break;
1605         case CHIP_HAINAN:
1606                 chip_name = "HAINAN";
1607                 rlc_chip_name = "HAINAN";
1608                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1609                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1610                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1611                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1612                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1613                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1614                 break;
1615         default: BUG();
1616         }
1617
1618         DRM_INFO("Loading %s Microcode\n", chip_name);
1619
1620         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1621         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1622         if (err)
1623                 goto out;
1624         if (rdev->pfp_fw->size != pfp_req_size) {
1625                 printk(KERN_ERR
1626                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627                        rdev->pfp_fw->size, fw_name);
1628                 err = -EINVAL;
1629                 goto out;
1630         }
1631
1632         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1633         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1634         if (err)
1635                 goto out;
1636         if (rdev->me_fw->size != me_req_size) {
1637                 printk(KERN_ERR
1638                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1639                        rdev->me_fw->size, fw_name);
1640                 err = -EINVAL;
1641         }
1642
1643         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1644         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1645         if (err)
1646                 goto out;
1647         if (rdev->ce_fw->size != ce_req_size) {
1648                 printk(KERN_ERR
1649                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1650                        rdev->ce_fw->size, fw_name);
1651                 err = -EINVAL;
1652         }
1653
1654         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1655         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1656         if (err)
1657                 goto out;
1658         if (rdev->rlc_fw->size != rlc_req_size) {
1659                 printk(KERN_ERR
1660                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1661                        rdev->rlc_fw->size, fw_name);
1662                 err = -EINVAL;
1663         }
1664
1665         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1666         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1667         if (err)
1668                 goto out;
1669         if (rdev->mc_fw->size != mc_req_size) {
1670                 printk(KERN_ERR
1671                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1672                        rdev->mc_fw->size, fw_name);
1673                 err = -EINVAL;
1674         }
1675
1676         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1677         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1678         if (err) {
1679                 printk(KERN_ERR
1680                        "smc: error loading firmware \"%s\"\n",
1681                        fw_name);
1682                 release_firmware(rdev->smc_fw);
1683                 rdev->smc_fw = NULL;
1684                 err = 0;
1685         } else if (rdev->smc_fw->size != smc_req_size) {
1686                 printk(KERN_ERR
1687                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1688                        rdev->smc_fw->size, fw_name);
1689                 err = -EINVAL;
1690         }
1691
1692 out:
1693         if (err) {
1694                 if (err != -EINVAL)
1695                         printk(KERN_ERR
1696                                "si_cp: Failed to load firmware \"%s\"\n",
1697                                fw_name);
1698                 release_firmware(rdev->pfp_fw);
1699                 rdev->pfp_fw = NULL;
1700                 release_firmware(rdev->me_fw);
1701                 rdev->me_fw = NULL;
1702                 release_firmware(rdev->ce_fw);
1703                 rdev->ce_fw = NULL;
1704                 release_firmware(rdev->rlc_fw);
1705                 rdev->rlc_fw = NULL;
1706                 release_firmware(rdev->mc_fw);
1707                 rdev->mc_fw = NULL;
1708                 release_firmware(rdev->smc_fw);
1709                 rdev->smc_fw = NULL;
1710         }
1711         return err;
1712 }
1713
1714 /* watermark setup */
1715 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1716                                    struct radeon_crtc *radeon_crtc,
1717                                    struct drm_display_mode *mode,
1718                                    struct drm_display_mode *other_mode)
1719 {
1720         u32 tmp, buffer_alloc, i;
1721         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1722         /*
1723          * Line Buffer Setup
1724          * There are 3 line buffers, each one shared by 2 display controllers.
1725          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1726          * the display controllers.  The paritioning is done via one of four
1727          * preset allocations specified in bits 21:20:
1728          *  0 - half lb
1729          *  2 - whole lb, other crtc must be disabled
1730          */
1731         /* this can get tricky if we have two large displays on a paired group
1732          * of crtcs.  Ideally for multiple large displays we'd assign them to
1733          * non-linked crtcs for maximum line buffer allocation.
1734          */
1735         if (radeon_crtc->base.enabled && mode) {
1736                 if (other_mode) {
1737                         tmp = 0; /* 1/2 */
1738                         buffer_alloc = 1;
1739                 } else {
1740                         tmp = 2; /* whole */
1741                         buffer_alloc = 2;
1742                 }
1743         } else {
1744                 tmp = 0;
1745                 buffer_alloc = 0;
1746         }
1747
1748         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1749                DC_LB_MEMORY_CONFIG(tmp));
1750
1751         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1752                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1753         for (i = 0; i < rdev->usec_timeout; i++) {
1754                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1755                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1756                         break;
1757                 udelay(1);
1758         }
1759
1760         if (radeon_crtc->base.enabled && mode) {
1761                 switch (tmp) {
1762                 case 0:
1763                 default:
1764                         return 4096 * 2;
1765                 case 2:
1766                         return 8192 * 2;
1767                 }
1768         }
1769
1770         /* controller not enabled, so no lb used */
1771         return 0;
1772 }
1773
1774 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1775 {
1776         u32 tmp = RREG32(MC_SHARED_CHMAP);
1777
1778         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1779         case 0:
1780         default:
1781                 return 1;
1782         case 1:
1783                 return 2;
1784         case 2:
1785                 return 4;
1786         case 3:
1787                 return 8;
1788         case 4:
1789                 return 3;
1790         case 5:
1791                 return 6;
1792         case 6:
1793                 return 10;
1794         case 7:
1795                 return 12;
1796         case 8:
1797                 return 16;
1798         }
1799 }
1800
1801 struct dce6_wm_params {
1802         u32 dram_channels; /* number of dram channels */
1803         u32 yclk;          /* bandwidth per dram data pin in kHz */
1804         u32 sclk;          /* engine clock in kHz */
1805         u32 disp_clk;      /* display clock in kHz */
1806         u32 src_width;     /* viewport width */
1807         u32 active_time;   /* active display time in ns */
1808         u32 blank_time;    /* blank time in ns */
1809         bool interlaced;    /* mode is interlaced */
1810         fixed20_12 vsc;    /* vertical scale ratio */
1811         u32 num_heads;     /* number of active crtcs */
1812         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1813         u32 lb_size;       /* line buffer allocated to pipe */
1814         u32 vtaps;         /* vertical scaler taps */
1815 };
1816
1817 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1818 {
1819         /* Calculate raw DRAM Bandwidth */
1820         fixed20_12 dram_efficiency; /* 0.7 */
1821         fixed20_12 yclk, dram_channels, bandwidth;
1822         fixed20_12 a;
1823
1824         a.full = dfixed_const(1000);
1825         yclk.full = dfixed_const(wm->yclk);
1826         yclk.full = dfixed_div(yclk, a);
1827         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1828         a.full = dfixed_const(10);
1829         dram_efficiency.full = dfixed_const(7);
1830         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1831         bandwidth.full = dfixed_mul(dram_channels, yclk);
1832         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1833
1834         return dfixed_trunc(bandwidth);
1835 }
1836
1837 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1838 {
1839         /* Calculate DRAM Bandwidth and the part allocated to display. */
1840         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1841         fixed20_12 yclk, dram_channels, bandwidth;
1842         fixed20_12 a;
1843
1844         a.full = dfixed_const(1000);
1845         yclk.full = dfixed_const(wm->yclk);
1846         yclk.full = dfixed_div(yclk, a);
1847         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1848         a.full = dfixed_const(10);
1849         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1850         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1851         bandwidth.full = dfixed_mul(dram_channels, yclk);
1852         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1853
1854         return dfixed_trunc(bandwidth);
1855 }
1856
1857 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1858 {
1859         /* Calculate the display Data return Bandwidth */
1860         fixed20_12 return_efficiency; /* 0.8 */
1861         fixed20_12 sclk, bandwidth;
1862         fixed20_12 a;
1863
1864         a.full = dfixed_const(1000);
1865         sclk.full = dfixed_const(wm->sclk);
1866         sclk.full = dfixed_div(sclk, a);
1867         a.full = dfixed_const(10);
1868         return_efficiency.full = dfixed_const(8);
1869         return_efficiency.full = dfixed_div(return_efficiency, a);
1870         a.full = dfixed_const(32);
1871         bandwidth.full = dfixed_mul(a, sclk);
1872         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1873
1874         return dfixed_trunc(bandwidth);
1875 }
1876
1877 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1878 {
1879         return 32;
1880 }
1881
1882 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1883 {
1884         /* Calculate the DMIF Request Bandwidth */
1885         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1886         fixed20_12 disp_clk, sclk, bandwidth;
1887         fixed20_12 a, b1, b2;
1888         u32 min_bandwidth;
1889
1890         a.full = dfixed_const(1000);
1891         disp_clk.full = dfixed_const(wm->disp_clk);
1892         disp_clk.full = dfixed_div(disp_clk, a);
1893         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1894         b1.full = dfixed_mul(a, disp_clk);
1895
1896         a.full = dfixed_const(1000);
1897         sclk.full = dfixed_const(wm->sclk);
1898         sclk.full = dfixed_div(sclk, a);
1899         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1900         b2.full = dfixed_mul(a, sclk);
1901
1902         a.full = dfixed_const(10);
1903         disp_clk_request_efficiency.full = dfixed_const(8);
1904         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1905
1906         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1907
1908         a.full = dfixed_const(min_bandwidth);
1909         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1910
1911         return dfixed_trunc(bandwidth);
1912 }
1913
1914 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1915 {
1916         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1917         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1918         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1919         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1920
1921         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1922 }
1923
1924 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1925 {
1926         /* Calculate the display mode Average Bandwidth
1927          * DisplayMode should contain the source and destination dimensions,
1928          * timing, etc.
1929          */
1930         fixed20_12 bpp;
1931         fixed20_12 line_time;
1932         fixed20_12 src_width;
1933         fixed20_12 bandwidth;
1934         fixed20_12 a;
1935
1936         a.full = dfixed_const(1000);
1937         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1938         line_time.full = dfixed_div(line_time, a);
1939         bpp.full = dfixed_const(wm->bytes_per_pixel);
1940         src_width.full = dfixed_const(wm->src_width);
1941         bandwidth.full = dfixed_mul(src_width, bpp);
1942         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1943         bandwidth.full = dfixed_div(bandwidth, line_time);
1944
1945         return dfixed_trunc(bandwidth);
1946 }
1947
1948 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1949 {
1950         /* First calcualte the latency in ns */
1951         u32 mc_latency = 2000; /* 2000 ns. */
1952         u32 available_bandwidth = dce6_available_bandwidth(wm);
1953         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1954         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1955         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1956         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1957                 (wm->num_heads * cursor_line_pair_return_time);
1958         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1959         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1960         u32 tmp, dmif_size = 12288;
1961         fixed20_12 a, b, c;
1962
1963         if (wm->num_heads == 0)
1964                 return 0;
1965
1966         a.full = dfixed_const(2);
1967         b.full = dfixed_const(1);
1968         if ((wm->vsc.full > a.full) ||
1969             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1970             (wm->vtaps >= 5) ||
1971             ((wm->vsc.full >= a.full) && wm->interlaced))
1972                 max_src_lines_per_dst_line = 4;
1973         else
1974                 max_src_lines_per_dst_line = 2;
1975
1976         a.full = dfixed_const(available_bandwidth);
1977         b.full = dfixed_const(wm->num_heads);
1978         a.full = dfixed_div(a, b);
1979
1980         b.full = dfixed_const(mc_latency + 512);
1981         c.full = dfixed_const(wm->disp_clk);
1982         b.full = dfixed_div(b, c);
1983
1984         c.full = dfixed_const(dmif_size);
1985         b.full = dfixed_div(c, b);
1986
1987         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1988
1989         b.full = dfixed_const(1000);
1990         c.full = dfixed_const(wm->disp_clk);
1991         b.full = dfixed_div(c, b);
1992         c.full = dfixed_const(wm->bytes_per_pixel);
1993         b.full = dfixed_mul(b, c);
1994
1995         lb_fill_bw = min(tmp, dfixed_trunc(b));
1996
1997         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1998         b.full = dfixed_const(1000);
1999         c.full = dfixed_const(lb_fill_bw);
2000         b.full = dfixed_div(c, b);
2001         a.full = dfixed_div(a, b);
2002         line_fill_time = dfixed_trunc(a);
2003
2004         if (line_fill_time < wm->active_time)
2005                 return latency;
2006         else
2007                 return latency + (line_fill_time - wm->active_time);
2008
2009 }
2010
2011 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2012 {
2013         if (dce6_average_bandwidth(wm) <=
2014             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2015                 return true;
2016         else
2017                 return false;
2018 };
2019
2020 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2021 {
2022         if (dce6_average_bandwidth(wm) <=
2023             (dce6_available_bandwidth(wm) / wm->num_heads))
2024                 return true;
2025         else
2026                 return false;
2027 };
2028
2029 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2030 {
2031         u32 lb_partitions = wm->lb_size / wm->src_width;
2032         u32 line_time = wm->active_time + wm->blank_time;
2033         u32 latency_tolerant_lines;
2034         u32 latency_hiding;
2035         fixed20_12 a;
2036
2037         a.full = dfixed_const(1);
2038         if (wm->vsc.full > a.full)
2039                 latency_tolerant_lines = 1;
2040         else {
2041                 if (lb_partitions <= (wm->vtaps + 1))
2042                         latency_tolerant_lines = 1;
2043                 else
2044                         latency_tolerant_lines = 2;
2045         }
2046
2047         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2048
2049         if (dce6_latency_watermark(wm) <= latency_hiding)
2050                 return true;
2051         else
2052                 return false;
2053 }
2054
2055 static void dce6_program_watermarks(struct radeon_device *rdev,
2056                                          struct radeon_crtc *radeon_crtc,
2057                                          u32 lb_size, u32 num_heads)
2058 {
2059         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2060         struct dce6_wm_params wm_low, wm_high;
2061         u32 dram_channels;
2062         u32 pixel_period;
2063         u32 line_time = 0;
2064         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2065         u32 priority_a_mark = 0, priority_b_mark = 0;
2066         u32 priority_a_cnt = PRIORITY_OFF;
2067         u32 priority_b_cnt = PRIORITY_OFF;
2068         u32 tmp, arb_control3;
2069         fixed20_12 a, b, c;
2070
2071         if (radeon_crtc->base.enabled && num_heads && mode) {
2072                 pixel_period = 1000000 / (u32)mode->clock;
2073                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2074                 priority_a_cnt = 0;
2075                 priority_b_cnt = 0;
2076
2077                 if (rdev->family == CHIP_ARUBA)
2078                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2079                 else
2080                         dram_channels = si_get_number_of_dram_channels(rdev);
2081
2082                 /* watermark for high clocks */
2083                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2084                         wm_high.yclk =
2085                                 radeon_dpm_get_mclk(rdev, false) * 10;
2086                         wm_high.sclk =
2087                                 radeon_dpm_get_sclk(rdev, false) * 10;
2088                 } else {
2089                         wm_high.yclk = rdev->pm.current_mclk * 10;
2090                         wm_high.sclk = rdev->pm.current_sclk * 10;
2091                 }
2092
2093                 wm_high.disp_clk = mode->clock;
2094                 wm_high.src_width = mode->crtc_hdisplay;
2095                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2096                 wm_high.blank_time = line_time - wm_high.active_time;
2097                 wm_high.interlaced = false;
2098                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2099                         wm_high.interlaced = true;
2100                 wm_high.vsc = radeon_crtc->vsc;
2101                 wm_high.vtaps = 1;
2102                 if (radeon_crtc->rmx_type != RMX_OFF)
2103                         wm_high.vtaps = 2;
2104                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2105                 wm_high.lb_size = lb_size;
2106                 wm_high.dram_channels = dram_channels;
2107                 wm_high.num_heads = num_heads;
2108
2109                 /* watermark for low clocks */
2110                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2111                         wm_low.yclk =
2112                                 radeon_dpm_get_mclk(rdev, true) * 10;
2113                         wm_low.sclk =
2114                                 radeon_dpm_get_sclk(rdev, true) * 10;
2115                 } else {
2116                         wm_low.yclk = rdev->pm.current_mclk * 10;
2117                         wm_low.sclk = rdev->pm.current_sclk * 10;
2118                 }
2119
2120                 wm_low.disp_clk = mode->clock;
2121                 wm_low.src_width = mode->crtc_hdisplay;
2122                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2123                 wm_low.blank_time = line_time - wm_low.active_time;
2124                 wm_low.interlaced = false;
2125                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2126                         wm_low.interlaced = true;
2127                 wm_low.vsc = radeon_crtc->vsc;
2128                 wm_low.vtaps = 1;
2129                 if (radeon_crtc->rmx_type != RMX_OFF)
2130                         wm_low.vtaps = 2;
2131                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2132                 wm_low.lb_size = lb_size;
2133                 wm_low.dram_channels = dram_channels;
2134                 wm_low.num_heads = num_heads;
2135
2136                 /* set for high clocks */
2137                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2138                 /* set for low clocks */
2139                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2140
2141                 /* possibly force display priority to high */
2142                 /* should really do this at mode validation time... */
2143                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2144                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2145                     !dce6_check_latency_hiding(&wm_high) ||
2146                     (rdev->disp_priority == 2)) {
2147                         DRM_DEBUG_KMS("force priority to high\n");
2148                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2149                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2150                 }
2151                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2152                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2153                     !dce6_check_latency_hiding(&wm_low) ||
2154                     (rdev->disp_priority == 2)) {
2155                         DRM_DEBUG_KMS("force priority to high\n");
2156                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2157                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2158                 }
2159
2160                 a.full = dfixed_const(1000);
2161                 b.full = dfixed_const(mode->clock);
2162                 b.full = dfixed_div(b, a);
2163                 c.full = dfixed_const(latency_watermark_a);
2164                 c.full = dfixed_mul(c, b);
2165                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2166                 c.full = dfixed_div(c, a);
2167                 a.full = dfixed_const(16);
2168                 c.full = dfixed_div(c, a);
2169                 priority_a_mark = dfixed_trunc(c);
2170                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2171
2172                 a.full = dfixed_const(1000);
2173                 b.full = dfixed_const(mode->clock);
2174                 b.full = dfixed_div(b, a);
2175                 c.full = dfixed_const(latency_watermark_b);
2176                 c.full = dfixed_mul(c, b);
2177                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2178                 c.full = dfixed_div(c, a);
2179                 a.full = dfixed_const(16);
2180                 c.full = dfixed_div(c, a);
2181                 priority_b_mark = dfixed_trunc(c);
2182                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2183         }
2184
2185         /* select wm A */
2186         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2187         tmp = arb_control3;
2188         tmp &= ~LATENCY_WATERMARK_MASK(3);
2189         tmp |= LATENCY_WATERMARK_MASK(1);
2190         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2191         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2192                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2193                 LATENCY_HIGH_WATERMARK(line_time)));
2194         /* select wm B */
2195         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2196         tmp &= ~LATENCY_WATERMARK_MASK(3);
2197         tmp |= LATENCY_WATERMARK_MASK(2);
2198         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2199         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2200                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2201                 LATENCY_HIGH_WATERMARK(line_time)));
2202         /* restore original selection */
2203         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2204
2205         /* write the priority marks */
2206         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2207         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2208
2209         /* save values for DPM */
2210         radeon_crtc->line_time = line_time;
2211         radeon_crtc->wm_high = latency_watermark_a;
2212         radeon_crtc->wm_low = latency_watermark_b;
2213 }
2214
2215 void dce6_bandwidth_update(struct radeon_device *rdev)
2216 {
2217         struct drm_display_mode *mode0 = NULL;
2218         struct drm_display_mode *mode1 = NULL;
2219         u32 num_heads = 0, lb_size;
2220         int i;
2221
2222         radeon_update_display_priority(rdev);
2223
2224         for (i = 0; i < rdev->num_crtc; i++) {
2225                 if (rdev->mode_info.crtcs[i]->base.enabled)
2226                         num_heads++;
2227         }
2228         for (i = 0; i < rdev->num_crtc; i += 2) {
2229                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2230                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2231                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2232                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2233                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2234                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2235         }
2236 }
2237
2238 /*
2239  * Core functions
2240  */
2241 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2242 {
2243         const u32 num_tile_mode_states = 32;
2244         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2245
2246         switch (rdev->config.si.mem_row_size_in_kb) {
2247         case 1:
2248                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2249                 break;
2250         case 2:
2251         default:
2252                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2253                 break;
2254         case 4:
2255                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2256                 break;
2257         }
2258
2259         if ((rdev->family == CHIP_TAHITI) ||
2260             (rdev->family == CHIP_PITCAIRN)) {
2261                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2262                         switch (reg_offset) {
2263                         case 0:  /* non-AA compressed depth or any compressed stencil */
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2266                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2267                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2268                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2269                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2271                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2272                                 break;
2273                         case 1:  /* 2xAA/4xAA compressed depth only */
2274                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2276                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2277                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2278                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2279                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2282                                 break;
2283                         case 2:  /* 8xAA compressed depth only */
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2285                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2286                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2287                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2288                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2289                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2291                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2292                                 break;
2293                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2296                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2297                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2298                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2299                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2301                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2302                                 break;
2303                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2304                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2305                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2306                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2307                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2308                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2309                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2311                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2312                                 break;
2313                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2314                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2316                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2317                                                  TILE_SPLIT(split_equal_to_row_size) |
2318                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2319                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2320                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2321                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2322                                 break;
2323                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2324                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2326                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2327                                                  TILE_SPLIT(split_equal_to_row_size) |
2328                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2329                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2332                                 break;
2333                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2334                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2336                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2337                                                  TILE_SPLIT(split_equal_to_row_size) |
2338                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2339                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2342                                 break;
2343                         case 8:  /* 1D and 1D Array Surfaces */
2344                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2345                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2346                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2347                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2348                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2349                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2352                                 break;
2353                         case 9:  /* Displayable maps. */
2354                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2357                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2358                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2359                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2361                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2362                                 break;
2363                         case 10:  /* Display 8bpp. */
2364                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2367                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2368                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2369                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2370                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2371                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2372                                 break;
2373                         case 11:  /* Display 16bpp. */
2374                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2376                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2377                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2378                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2379                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2381                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2382                                 break;
2383                         case 12:  /* Display 32bpp. */
2384                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2385                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2387                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2388                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2389                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2391                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2392                                 break;
2393                         case 13:  /* Thin. */
2394                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2395                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2396                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2397                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2398                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2399                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2402                                 break;
2403                         case 14:  /* Thin 8 bpp. */
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2406                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2407                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2408                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2409                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2412                                 break;
2413                         case 15:  /* Thin 16 bpp. */
2414                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2415                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2416                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2417                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2418                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2419                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2421                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2422                                 break;
2423                         case 16:  /* Thin 32 bpp. */
2424                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2426                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2427                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2428                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2429                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2431                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2432                                 break;
2433                         case 17:  /* Thin 64 bpp. */
2434                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2436                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2437                                                  TILE_SPLIT(split_equal_to_row_size) |
2438                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2439                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2440                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2441                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2442                                 break;
2443                         case 21:  /* 8 bpp PRT. */
2444                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2446                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2447                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2448                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2449                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2450                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2452                                 break;
2453                         case 22:  /* 16 bpp PRT */
2454                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2456                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2457                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2458                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2459                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2462                                 break;
2463                         case 23:  /* 32 bpp PRT */
2464                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2465                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2466                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2467                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2468                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2469                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2471                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2472                                 break;
2473                         case 24:  /* 64 bpp PRT */
2474                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2475                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2476                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2477                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2478                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2479                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2481                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2482                                 break;
2483                         case 25:  /* 128 bpp PRT */
2484                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2486                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2487                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2488                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2489                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2492                                 break;
2493                         default:
2494                                 gb_tile_moden = 0;
2495                                 break;
2496                         }
2497                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2498                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499                 }
2500         } else if ((rdev->family == CHIP_VERDE) ||
2501                    (rdev->family == CHIP_OLAND) ||
2502                    (rdev->family == CHIP_HAINAN)) {
2503                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2504                         switch (reg_offset) {
2505                         case 0:  /* non-AA compressed depth or any compressed stencil */
2506                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2508                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2509                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2510                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2511                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2514                                 break;
2515                         case 1:  /* 2xAA/4xAA compressed depth only */
2516                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2518                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2519                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2520                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2521                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2523                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2524                                 break;
2525                         case 2:  /* 8xAA compressed depth only */
2526                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2528                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2529                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2530                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2531                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2533                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2534                                 break;
2535                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2536                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2539                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2540                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2541                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2544                                 break;
2545                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2546                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2548                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2549                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2550                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2551                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2554                                 break;
2555                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2556                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2558                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2559                                                  TILE_SPLIT(split_equal_to_row_size) |
2560                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2561                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2563                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2564                                 break;
2565                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2566                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2568                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2569                                                  TILE_SPLIT(split_equal_to_row_size) |
2570                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2571                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2573                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2574                                 break;
2575                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2576                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579                                                  TILE_SPLIT(split_equal_to_row_size) |
2580                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2581                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2583                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2584                                 break;
2585                         case 8:  /* 1D and 1D Array Surfaces */
2586                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2587                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2589                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2590                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2591                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2593                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2594                                 break;
2595                         case 9:  /* Displayable maps. */
2596                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2599                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2600                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2601                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2603                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604                                 break;
2605                         case 10:  /* Display 8bpp. */
2606                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2609                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2610                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2611                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2614                                 break;
2615                         case 11:  /* Display 16bpp. */
2616                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2618                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2619                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2620                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2621                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2623                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2624                                 break;
2625                         case 12:  /* Display 32bpp. */
2626                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2628                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2629                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2630                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2631                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2634                                 break;
2635                         case 13:  /* Thin. */
2636                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2638                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2639                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2640                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2641                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2643                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2644                                 break;
2645                         case 14:  /* Thin 8 bpp. */
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2648                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2650                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2651                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2654                                 break;
2655                         case 15:  /* Thin 16 bpp. */
2656                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2658                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2659                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2660                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2661                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2663                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2664                                 break;
2665                         case 16:  /* Thin 32 bpp. */
2666                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2668                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2669                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2670                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2671                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2674                                 break;
2675                         case 17:  /* Thin 64 bpp. */
2676                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2678                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2679                                                  TILE_SPLIT(split_equal_to_row_size) |
2680                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2681                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2683                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2684                                 break;
2685                         case 21:  /* 8 bpp PRT. */
2686                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2691                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2694                                 break;
2695                         case 22:  /* 16 bpp PRT */
2696                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2698                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2699                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2700                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2701                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2704                                 break;
2705                         case 23:  /* 32 bpp PRT */
2706                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2708                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2709                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2710                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2711                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2713                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2714                                 break;
2715                         case 24:  /* 64 bpp PRT */
2716                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2718                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2719                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2720                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2721                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2724                                 break;
2725                         case 25:  /* 128 bpp PRT */
2726                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2727                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2728                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2729                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2730                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2731                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2734                                 break;
2735                         default:
2736                                 gb_tile_moden = 0;
2737                                 break;
2738                         }
2739                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2740                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2741                 }
2742         } else
2743                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2744 }
2745
2746 static void si_select_se_sh(struct radeon_device *rdev,
2747                             u32 se_num, u32 sh_num)
2748 {
2749         u32 data = INSTANCE_BROADCAST_WRITES;
2750
2751         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2752                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2753         else if (se_num == 0xffffffff)
2754                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2755         else if (sh_num == 0xffffffff)
2756                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2757         else
2758                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2759         WREG32(GRBM_GFX_INDEX, data);
2760 }
2761
2762 static u32 si_create_bitmask(u32 bit_width)
2763 {
2764         u32 i, mask = 0;
2765
2766         for (i = 0; i < bit_width; i++) {
2767                 mask <<= 1;
2768                 mask |= 1;
2769         }
2770         return mask;
2771 }
2772
2773 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2774 {
2775         u32 data, mask;
2776
2777         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2778         if (data & 1)
2779                 data &= INACTIVE_CUS_MASK;
2780         else
2781                 data = 0;
2782         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2783
2784         data >>= INACTIVE_CUS_SHIFT;
2785
2786         mask = si_create_bitmask(cu_per_sh);
2787
2788         return ~data & mask;
2789 }
2790
2791 static void si_setup_spi(struct radeon_device *rdev,
2792                          u32 se_num, u32 sh_per_se,
2793                          u32 cu_per_sh)
2794 {
2795         int i, j, k;
2796         u32 data, mask, active_cu;
2797
2798         for (i = 0; i < se_num; i++) {
2799                 for (j = 0; j < sh_per_se; j++) {
2800                         si_select_se_sh(rdev, i, j);
2801                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2802                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2803
2804                         mask = 1;
2805                         for (k = 0; k < 16; k++) {
2806                                 mask <<= k;
2807                                 if (active_cu & mask) {
2808                                         data &= ~mask;
2809                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2810                                         break;
2811                                 }
2812                         }
2813                 }
2814         }
2815         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2816 }
2817
2818 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2819                               u32 max_rb_num, u32 se_num,
2820                               u32 sh_per_se)
2821 {
2822         u32 data, mask;
2823
2824         data = RREG32(CC_RB_BACKEND_DISABLE);
2825         if (data & 1)
2826                 data &= BACKEND_DISABLE_MASK;
2827         else
2828                 data = 0;
2829         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2830
2831         data >>= BACKEND_DISABLE_SHIFT;
2832
2833         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2834
2835         return data & mask;
2836 }
2837
2838 static void si_setup_rb(struct radeon_device *rdev,
2839                         u32 se_num, u32 sh_per_se,
2840                         u32 max_rb_num)
2841 {
2842         int i, j;
2843         u32 data, mask;
2844         u32 disabled_rbs = 0;
2845         u32 enabled_rbs = 0;
2846
2847         for (i = 0; i < se_num; i++) {
2848                 for (j = 0; j < sh_per_se; j++) {
2849                         si_select_se_sh(rdev, i, j);
2850                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2851                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2852                 }
2853         }
2854         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2855
2856         mask = 1;
2857         for (i = 0; i < max_rb_num; i++) {
2858                 if (!(disabled_rbs & mask))
2859                         enabled_rbs |= mask;
2860                 mask <<= 1;
2861         }
2862
2863         for (i = 0; i < se_num; i++) {
2864                 si_select_se_sh(rdev, i, 0xffffffff);
2865                 data = 0;
2866                 for (j = 0; j < sh_per_se; j++) {
2867                         switch (enabled_rbs & 3) {
2868                         case 1:
2869                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2870                                 break;
2871                         case 2:
2872                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2873                                 break;
2874                         case 3:
2875                         default:
2876                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2877                                 break;
2878                         }
2879                         enabled_rbs >>= 2;
2880                 }
2881                 WREG32(PA_SC_RASTER_CONFIG, data);
2882         }
2883         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2884 }
2885
2886 static void si_gpu_init(struct radeon_device *rdev)
2887 {
2888         u32 gb_addr_config = 0;
2889         u32 mc_shared_chmap, mc_arb_ramcfg;
2890         u32 sx_debug_1;
2891         u32 hdp_host_path_cntl;
2892         u32 tmp;
2893         int i, j;
2894
2895         switch (rdev->family) {
2896         case CHIP_TAHITI:
2897                 rdev->config.si.max_shader_engines = 2;
2898                 rdev->config.si.max_tile_pipes = 12;
2899                 rdev->config.si.max_cu_per_sh = 8;
2900                 rdev->config.si.max_sh_per_se = 2;
2901                 rdev->config.si.max_backends_per_se = 4;
2902                 rdev->config.si.max_texture_channel_caches = 12;
2903                 rdev->config.si.max_gprs = 256;
2904                 rdev->config.si.max_gs_threads = 32;
2905                 rdev->config.si.max_hw_contexts = 8;
2906
2907                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2908                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2909                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2910                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2911                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2912                 break;
2913         case CHIP_PITCAIRN:
2914                 rdev->config.si.max_shader_engines = 2;
2915                 rdev->config.si.max_tile_pipes = 8;
2916                 rdev->config.si.max_cu_per_sh = 5;
2917                 rdev->config.si.max_sh_per_se = 2;
2918                 rdev->config.si.max_backends_per_se = 4;
2919                 rdev->config.si.max_texture_channel_caches = 8;
2920                 rdev->config.si.max_gprs = 256;
2921                 rdev->config.si.max_gs_threads = 32;
2922                 rdev->config.si.max_hw_contexts = 8;
2923
2924                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2925                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2926                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2927                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2928                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2929                 break;
2930         case CHIP_VERDE:
2931         default:
2932                 rdev->config.si.max_shader_engines = 1;
2933                 rdev->config.si.max_tile_pipes = 4;
2934                 rdev->config.si.max_cu_per_sh = 5;
2935                 rdev->config.si.max_sh_per_se = 2;
2936                 rdev->config.si.max_backends_per_se = 4;
2937                 rdev->config.si.max_texture_channel_caches = 4;
2938                 rdev->config.si.max_gprs = 256;
2939                 rdev->config.si.max_gs_threads = 32;
2940                 rdev->config.si.max_hw_contexts = 8;
2941
2942                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2943                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2944                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2945                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2946                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2947                 break;
2948         case CHIP_OLAND:
2949                 rdev->config.si.max_shader_engines = 1;
2950                 rdev->config.si.max_tile_pipes = 4;
2951                 rdev->config.si.max_cu_per_sh = 6;
2952                 rdev->config.si.max_sh_per_se = 1;
2953                 rdev->config.si.max_backends_per_se = 2;
2954                 rdev->config.si.max_texture_channel_caches = 4;
2955                 rdev->config.si.max_gprs = 256;
2956                 rdev->config.si.max_gs_threads = 16;
2957                 rdev->config.si.max_hw_contexts = 8;
2958
2959                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2960                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2961                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2962                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2963                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2964                 break;
2965         case CHIP_HAINAN:
2966                 rdev->config.si.max_shader_engines = 1;
2967                 rdev->config.si.max_tile_pipes = 4;
2968                 rdev->config.si.max_cu_per_sh = 5;
2969                 rdev->config.si.max_sh_per_se = 1;
2970                 rdev->config.si.max_backends_per_se = 1;
2971                 rdev->config.si.max_texture_channel_caches = 2;
2972                 rdev->config.si.max_gprs = 256;
2973                 rdev->config.si.max_gs_threads = 16;
2974                 rdev->config.si.max_hw_contexts = 8;
2975
2976                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2977                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2978                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2979                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2980                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2981                 break;
2982         }
2983
2984         /* Initialize HDP */
2985         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2986                 WREG32((0x2c14 + j), 0x00000000);
2987                 WREG32((0x2c18 + j), 0x00000000);
2988                 WREG32((0x2c1c + j), 0x00000000);
2989                 WREG32((0x2c20 + j), 0x00000000);
2990                 WREG32((0x2c24 + j), 0x00000000);
2991         }
2992
2993         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2994
2995         evergreen_fix_pci_max_read_req_size(rdev);
2996
2997         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2998
2999         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3000         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3001
3002         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3003         rdev->config.si.mem_max_burst_length_bytes = 256;
3004         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3005         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3006         if (rdev->config.si.mem_row_size_in_kb > 4)
3007                 rdev->config.si.mem_row_size_in_kb = 4;
3008         /* XXX use MC settings? */
3009         rdev->config.si.shader_engine_tile_size = 32;
3010         rdev->config.si.num_gpus = 1;
3011         rdev->config.si.multi_gpu_tile_size = 64;
3012
3013         /* fix up row size */
3014         gb_addr_config &= ~ROW_SIZE_MASK;
3015         switch (rdev->config.si.mem_row_size_in_kb) {
3016         case 1:
3017         default:
3018                 gb_addr_config |= ROW_SIZE(0);
3019                 break;
3020         case 2:
3021                 gb_addr_config |= ROW_SIZE(1);
3022                 break;
3023         case 4:
3024                 gb_addr_config |= ROW_SIZE(2);
3025                 break;
3026         }
3027
3028         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3029          * not have bank info, so create a custom tiling dword.
3030          * bits 3:0   num_pipes
3031          * bits 7:4   num_banks
3032          * bits 11:8  group_size
3033          * bits 15:12 row_size
3034          */
3035         rdev->config.si.tile_config = 0;
3036         switch (rdev->config.si.num_tile_pipes) {
3037         case 1:
3038                 rdev->config.si.tile_config |= (0 << 0);
3039                 break;
3040         case 2:
3041                 rdev->config.si.tile_config |= (1 << 0);
3042                 break;
3043         case 4:
3044                 rdev->config.si.tile_config |= (2 << 0);
3045                 break;
3046         case 8:
3047         default:
3048                 /* XXX what about 12? */
3049                 rdev->config.si.tile_config |= (3 << 0);
3050                 break;
3051         }       
3052         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3053         case 0: /* four banks */
3054                 rdev->config.si.tile_config |= 0 << 4;
3055                 break;
3056         case 1: /* eight banks */
3057                 rdev->config.si.tile_config |= 1 << 4;
3058                 break;
3059         case 2: /* sixteen banks */
3060         default:
3061                 rdev->config.si.tile_config |= 2 << 4;
3062                 break;
3063         }
3064         rdev->config.si.tile_config |=
3065                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3066         rdev->config.si.tile_config |=
3067                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3068
3069         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3070         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3071         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3072         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3073         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3074         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3075         if (rdev->has_uvd) {
3076                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3077                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3078                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3079         }
3080
3081         si_tiling_mode_table_init(rdev);
3082
3083         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3084                     rdev->config.si.max_sh_per_se,
3085                     rdev->config.si.max_backends_per_se);
3086
3087         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3088                      rdev->config.si.max_sh_per_se,
3089                      rdev->config.si.max_cu_per_sh);
3090
3091
3092         /* set HW defaults for 3D engine */
3093         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3094                                      ROQ_IB2_START(0x2b)));
3095         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3096
3097         sx_debug_1 = RREG32(SX_DEBUG_1);
3098         WREG32(SX_DEBUG_1, sx_debug_1);
3099
3100         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3101
3102         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3103                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3104                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3105                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3106
3107         WREG32(VGT_NUM_INSTANCES, 1);
3108
3109         WREG32(CP_PERFMON_CNTL, 0);
3110
3111         WREG32(SQ_CONFIG, 0);
3112
3113         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3114                                           FORCE_EOV_MAX_REZ_CNT(255)));
3115
3116         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3117                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3118
3119         WREG32(VGT_GS_VERTEX_REUSE, 16);
3120         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3121
3122         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3123         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3124         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3125         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3126         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3127         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3128         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3129         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3130
3131         tmp = RREG32(HDP_MISC_CNTL);
3132         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3133         WREG32(HDP_MISC_CNTL, tmp);
3134
3135         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3136         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3137
3138         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3139
3140         udelay(50);
3141 }
3142
3143 /*
3144  * GPU scratch registers helpers function.
3145  */
3146 static void si_scratch_init(struct radeon_device *rdev)
3147 {
3148         int i;
3149
3150         rdev->scratch.num_reg = 7;
3151         rdev->scratch.reg_base = SCRATCH_REG0;
3152         for (i = 0; i < rdev->scratch.num_reg; i++) {
3153                 rdev->scratch.free[i] = true;
3154                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3155         }
3156 }
3157
3158 void si_fence_ring_emit(struct radeon_device *rdev,
3159                         struct radeon_fence *fence)
3160 {
3161         struct radeon_ring *ring = &rdev->ring[fence->ring];
3162         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3163
3164         /* flush read cache over gart */
3165         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3166         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3167         radeon_ring_write(ring, 0);
3168         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3169         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3170                           PACKET3_TC_ACTION_ENA |
3171                           PACKET3_SH_KCACHE_ACTION_ENA |
3172                           PACKET3_SH_ICACHE_ACTION_ENA);
3173         radeon_ring_write(ring, 0xFFFFFFFF);
3174         radeon_ring_write(ring, 0);
3175         radeon_ring_write(ring, 10); /* poll interval */
3176         /* EVENT_WRITE_EOP - flush caches, send int */
3177         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3178         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3179         radeon_ring_write(ring, addr & 0xffffffff);
3180         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3181         radeon_ring_write(ring, fence->seq);
3182         radeon_ring_write(ring, 0);
3183 }
3184
3185 /*
3186  * IB stuff
3187  */
3188 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3189 {
3190         struct radeon_ring *ring = &rdev->ring[ib->ring];
3191         u32 header;
3192
3193         if (ib->is_const_ib) {
3194                 /* set switch buffer packet before const IB */
3195                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3196                 radeon_ring_write(ring, 0);
3197
3198                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3199         } else {
3200                 u32 next_rptr;
3201                 if (ring->rptr_save_reg) {
3202                         next_rptr = ring->wptr + 3 + 4 + 8;
3203                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3204                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3205                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3206                         radeon_ring_write(ring, next_rptr);
3207                 } else if (rdev->wb.enabled) {
3208                         next_rptr = ring->wptr + 5 + 4 + 8;
3209                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3210                         radeon_ring_write(ring, (1 << 8));
3211                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3212                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3213                         radeon_ring_write(ring, next_rptr);
3214                 }
3215
3216                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3217         }
3218
3219         radeon_ring_write(ring, header);
3220         radeon_ring_write(ring,
3221 #ifdef __BIG_ENDIAN
3222                           (2 << 0) |
3223 #endif
3224                           (ib->gpu_addr & 0xFFFFFFFC));
3225         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3226         radeon_ring_write(ring, ib->length_dw |
3227                           (ib->vm ? (ib->vm->id << 24) : 0));
3228
3229         if (!ib->is_const_ib) {
3230                 /* flush read cache over gart for this vmid */
3231                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3232                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3233                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3234                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3235                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3236                                   PACKET3_TC_ACTION_ENA |
3237                                   PACKET3_SH_KCACHE_ACTION_ENA |
3238                                   PACKET3_SH_ICACHE_ACTION_ENA);
3239                 radeon_ring_write(ring, 0xFFFFFFFF);
3240                 radeon_ring_write(ring, 0);
3241                 radeon_ring_write(ring, 10); /* poll interval */
3242         }
3243 }
3244
3245 /*
3246  * CP.
3247  */
3248 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3249 {
3250         if (enable)
3251                 WREG32(CP_ME_CNTL, 0);
3252         else {
3253                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3254                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3255                 WREG32(SCRATCH_UMSK, 0);
3256                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3257                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3258                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3259         }
3260         udelay(50);
3261 }
3262
3263 static int si_cp_load_microcode(struct radeon_device *rdev)
3264 {
3265         const __be32 *fw_data;
3266         int i;
3267
3268         if (!rdev->me_fw || !rdev->pfp_fw)
3269                 return -EINVAL;
3270
3271         si_cp_enable(rdev, false);
3272
3273         /* PFP */
3274         fw_data = (const __be32 *)rdev->pfp_fw->data;
3275         WREG32(CP_PFP_UCODE_ADDR, 0);
3276         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3277                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3278         WREG32(CP_PFP_UCODE_ADDR, 0);
3279
3280         /* CE */
3281         fw_data = (const __be32 *)rdev->ce_fw->data;
3282         WREG32(CP_CE_UCODE_ADDR, 0);
3283         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3284                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3285         WREG32(CP_CE_UCODE_ADDR, 0);
3286
3287         /* ME */
3288         fw_data = (const __be32 *)rdev->me_fw->data;
3289         WREG32(CP_ME_RAM_WADDR, 0);
3290         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3291                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3292         WREG32(CP_ME_RAM_WADDR, 0);
3293
3294         WREG32(CP_PFP_UCODE_ADDR, 0);
3295         WREG32(CP_CE_UCODE_ADDR, 0);
3296         WREG32(CP_ME_RAM_WADDR, 0);
3297         WREG32(CP_ME_RAM_RADDR, 0);
3298         return 0;
3299 }
3300
3301 static int si_cp_start(struct radeon_device *rdev)
3302 {
3303         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3304         int r, i;
3305
3306         r = radeon_ring_lock(rdev, ring, 7 + 4);
3307         if (r) {
3308                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3309                 return r;
3310         }
3311         /* init the CP */
3312         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3313         radeon_ring_write(ring, 0x1);
3314         radeon_ring_write(ring, 0x0);
3315         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3316         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3317         radeon_ring_write(ring, 0);
3318         radeon_ring_write(ring, 0);
3319
3320         /* init the CE partitions */
3321         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3322         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3323         radeon_ring_write(ring, 0xc000);
3324         radeon_ring_write(ring, 0xe000);
3325         radeon_ring_unlock_commit(rdev, ring);
3326
3327         si_cp_enable(rdev, true);
3328
3329         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3330         if (r) {
3331                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3332                 return r;
3333         }
3334
3335         /* setup clear context state */
3336         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3337         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3338
3339         for (i = 0; i < si_default_size; i++)
3340                 radeon_ring_write(ring, si_default_state[i]);
3341
3342         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3343         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3344
3345         /* set clear context state */
3346         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3347         radeon_ring_write(ring, 0);
3348
3349         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3350         radeon_ring_write(ring, 0x00000316);
3351         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3352         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3353
3354         radeon_ring_unlock_commit(rdev, ring);
3355
3356         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3357                 ring = &rdev->ring[i];
3358                 r = radeon_ring_lock(rdev, ring, 2);
3359
3360                 /* clear the compute context state */
3361                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3362                 radeon_ring_write(ring, 0);
3363
3364                 radeon_ring_unlock_commit(rdev, ring);
3365         }
3366
3367         return 0;
3368 }
3369
3370 static void si_cp_fini(struct radeon_device *rdev)
3371 {
3372         struct radeon_ring *ring;
3373         si_cp_enable(rdev, false);
3374
3375         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3376         radeon_ring_fini(rdev, ring);
3377         radeon_scratch_free(rdev, ring->rptr_save_reg);
3378
3379         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3380         radeon_ring_fini(rdev, ring);
3381         radeon_scratch_free(rdev, ring->rptr_save_reg);
3382
3383         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3384         radeon_ring_fini(rdev, ring);
3385         radeon_scratch_free(rdev, ring->rptr_save_reg);
3386 }
3387
3388 static int si_cp_resume(struct radeon_device *rdev)
3389 {
3390         struct radeon_ring *ring;
3391         u32 tmp;
3392         u32 rb_bufsz;
3393         int r;
3394
3395         si_enable_gui_idle_interrupt(rdev, false);
3396
3397         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3398         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3399
3400         /* Set the write pointer delay */
3401         WREG32(CP_RB_WPTR_DELAY, 0);
3402
3403         WREG32(CP_DEBUG, 0);
3404         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3405
3406         /* ring 0 - compute and gfx */
3407         /* Set ring buffer size */
3408         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3409         rb_bufsz = order_base_2(ring->ring_size / 8);
3410         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3411 #ifdef __BIG_ENDIAN
3412         tmp |= BUF_SWAP_32BIT;
3413 #endif
3414         WREG32(CP_RB0_CNTL, tmp);
3415
3416         /* Initialize the ring buffer's read and write pointers */
3417         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3418         ring->wptr = 0;
3419         WREG32(CP_RB0_WPTR, ring->wptr);
3420
3421         /* set the wb address whether it's enabled or not */
3422         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3423         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3424
3425         if (rdev->wb.enabled)
3426                 WREG32(SCRATCH_UMSK, 0xff);
3427         else {
3428                 tmp |= RB_NO_UPDATE;
3429                 WREG32(SCRATCH_UMSK, 0);
3430         }
3431
3432         mdelay(1);
3433         WREG32(CP_RB0_CNTL, tmp);
3434
3435         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3436
3437         ring->rptr = RREG32(CP_RB0_RPTR);
3438
3439         /* ring1  - compute only */
3440         /* Set ring buffer size */
3441         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3442         rb_bufsz = order_base_2(ring->ring_size / 8);
3443         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3444 #ifdef __BIG_ENDIAN
3445         tmp |= BUF_SWAP_32BIT;
3446 #endif
3447         WREG32(CP_RB1_CNTL, tmp);
3448
3449         /* Initialize the ring buffer's read and write pointers */
3450         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3451         ring->wptr = 0;
3452         WREG32(CP_RB1_WPTR, ring->wptr);
3453
3454         /* set the wb address whether it's enabled or not */
3455         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3456         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3457
3458         mdelay(1);
3459         WREG32(CP_RB1_CNTL, tmp);
3460
3461         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3462
3463         ring->rptr = RREG32(CP_RB1_RPTR);
3464
3465         /* ring2 - compute only */
3466         /* Set ring buffer size */
3467         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3468         rb_bufsz = order_base_2(ring->ring_size / 8);
3469         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3470 #ifdef __BIG_ENDIAN
3471         tmp |= BUF_SWAP_32BIT;
3472 #endif
3473         WREG32(CP_RB2_CNTL, tmp);
3474
3475         /* Initialize the ring buffer's read and write pointers */
3476         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3477         ring->wptr = 0;
3478         WREG32(CP_RB2_WPTR, ring->wptr);
3479
3480         /* set the wb address whether it's enabled or not */
3481         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3482         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3483
3484         mdelay(1);
3485         WREG32(CP_RB2_CNTL, tmp);
3486
3487         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3488
3489         ring->rptr = RREG32(CP_RB2_RPTR);
3490
3491         /* start the rings */
3492         si_cp_start(rdev);
3493         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3494         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3495         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3496         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3497         if (r) {
3498                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3499                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3500                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3501                 return r;
3502         }
3503         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3504         if (r) {
3505                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3506         }
3507         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3508         if (r) {
3509                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3510         }
3511
3512         si_enable_gui_idle_interrupt(rdev, true);
3513
3514         return 0;
3515 }
3516
3517 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3518 {
3519         u32 reset_mask = 0;
3520         u32 tmp;
3521
3522         /* GRBM_STATUS */
3523         tmp = RREG32(GRBM_STATUS);
3524         if (tmp & (PA_BUSY | SC_BUSY |
3525                    BCI_BUSY | SX_BUSY |
3526                    TA_BUSY | VGT_BUSY |
3527                    DB_BUSY | CB_BUSY |
3528                    GDS_BUSY | SPI_BUSY |
3529                    IA_BUSY | IA_BUSY_NO_DMA))
3530                 reset_mask |= RADEON_RESET_GFX;
3531
3532         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3533                    CP_BUSY | CP_COHERENCY_BUSY))
3534                 reset_mask |= RADEON_RESET_CP;
3535
3536         if (tmp & GRBM_EE_BUSY)
3537                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3538
3539         /* GRBM_STATUS2 */
3540         tmp = RREG32(GRBM_STATUS2);
3541         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3542                 reset_mask |= RADEON_RESET_RLC;
3543
3544         /* DMA_STATUS_REG 0 */
3545         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3546         if (!(tmp & DMA_IDLE))
3547                 reset_mask |= RADEON_RESET_DMA;
3548
3549         /* DMA_STATUS_REG 1 */
3550         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3551         if (!(tmp & DMA_IDLE))
3552                 reset_mask |= RADEON_RESET_DMA1;
3553
3554         /* SRBM_STATUS2 */
3555         tmp = RREG32(SRBM_STATUS2);
3556         if (tmp & DMA_BUSY)
3557                 reset_mask |= RADEON_RESET_DMA;
3558
3559         if (tmp & DMA1_BUSY)
3560                 reset_mask |= RADEON_RESET_DMA1;
3561
3562         /* SRBM_STATUS */
3563         tmp = RREG32(SRBM_STATUS);
3564
3565         if (tmp & IH_BUSY)
3566                 reset_mask |= RADEON_RESET_IH;
3567
3568         if (tmp & SEM_BUSY)
3569                 reset_mask |= RADEON_RESET_SEM;
3570
3571         if (tmp & GRBM_RQ_PENDING)
3572                 reset_mask |= RADEON_RESET_GRBM;
3573
3574         if (tmp & VMC_BUSY)
3575                 reset_mask |= RADEON_RESET_VMC;
3576
3577         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3578                    MCC_BUSY | MCD_BUSY))
3579                 reset_mask |= RADEON_RESET_MC;
3580
3581         if (evergreen_is_display_hung(rdev))
3582                 reset_mask |= RADEON_RESET_DISPLAY;
3583
3584         /* VM_L2_STATUS */
3585         tmp = RREG32(VM_L2_STATUS);
3586         if (tmp & L2_BUSY)
3587                 reset_mask |= RADEON_RESET_VMC;
3588
3589         /* Skip MC reset as it's mostly likely not hung, just busy */
3590         if (reset_mask & RADEON_RESET_MC) {
3591                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3592                 reset_mask &= ~RADEON_RESET_MC;
3593         }
3594
3595         return reset_mask;
3596 }
3597
3598 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3599 {
3600         struct evergreen_mc_save save;
3601         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3602         u32 tmp;
3603
3604         if (reset_mask == 0)
3605                 return;
3606
3607         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3608
3609         evergreen_print_gpu_status_regs(rdev);
3610         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3611                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3612         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3613                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3614
3615         /* disable PG/CG */
3616         si_fini_pg(rdev);
3617         si_fini_cg(rdev);
3618
3619         /* stop the rlc */
3620         si_rlc_stop(rdev);
3621
3622         /* Disable CP parsing/prefetching */
3623         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3624
3625         if (reset_mask & RADEON_RESET_DMA) {
3626                 /* dma0 */
3627                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3628                 tmp &= ~DMA_RB_ENABLE;
3629                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3630         }
3631         if (reset_mask & RADEON_RESET_DMA1) {
3632                 /* dma1 */
3633                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3634                 tmp &= ~DMA_RB_ENABLE;
3635                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3636         }
3637
3638         udelay(50);
3639
3640         evergreen_mc_stop(rdev, &save);
3641         if (evergreen_mc_wait_for_idle(rdev)) {
3642                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3643         }
3644
3645         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3646                 grbm_soft_reset = SOFT_RESET_CB |
3647                         SOFT_RESET_DB |
3648                         SOFT_RESET_GDS |
3649                         SOFT_RESET_PA |
3650                         SOFT_RESET_SC |
3651                         SOFT_RESET_BCI |
3652                         SOFT_RESET_SPI |
3653                         SOFT_RESET_SX |
3654                         SOFT_RESET_TC |
3655                         SOFT_RESET_TA |
3656                         SOFT_RESET_VGT |
3657                         SOFT_RESET_IA;
3658         }
3659
3660         if (reset_mask & RADEON_RESET_CP) {
3661                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3662
3663                 srbm_soft_reset |= SOFT_RESET_GRBM;
3664         }
3665
3666         if (reset_mask & RADEON_RESET_DMA)
3667                 srbm_soft_reset |= SOFT_RESET_DMA;
3668
3669         if (reset_mask & RADEON_RESET_DMA1)
3670                 srbm_soft_reset |= SOFT_RESET_DMA1;
3671
3672         if (reset_mask & RADEON_RESET_DISPLAY)
3673                 srbm_soft_reset |= SOFT_RESET_DC;
3674
3675         if (reset_mask & RADEON_RESET_RLC)
3676                 grbm_soft_reset |= SOFT_RESET_RLC;
3677
3678         if (reset_mask & RADEON_RESET_SEM)
3679                 srbm_soft_reset |= SOFT_RESET_SEM;
3680
3681         if (reset_mask & RADEON_RESET_IH)
3682                 srbm_soft_reset |= SOFT_RESET_IH;
3683
3684         if (reset_mask & RADEON_RESET_GRBM)
3685                 srbm_soft_reset |= SOFT_RESET_GRBM;
3686
3687         if (reset_mask & RADEON_RESET_VMC)
3688                 srbm_soft_reset |= SOFT_RESET_VMC;
3689
3690         if (reset_mask & RADEON_RESET_MC)
3691                 srbm_soft_reset |= SOFT_RESET_MC;
3692
3693         if (grbm_soft_reset) {
3694                 tmp = RREG32(GRBM_SOFT_RESET);
3695                 tmp |= grbm_soft_reset;
3696                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3697                 WREG32(GRBM_SOFT_RESET, tmp);
3698                 tmp = RREG32(GRBM_SOFT_RESET);
3699
3700                 udelay(50);
3701
3702                 tmp &= ~grbm_soft_reset;
3703                 WREG32(GRBM_SOFT_RESET, tmp);
3704                 tmp = RREG32(GRBM_SOFT_RESET);
3705         }
3706
3707         if (srbm_soft_reset) {
3708                 tmp = RREG32(SRBM_SOFT_RESET);
3709                 tmp |= srbm_soft_reset;
3710                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3711                 WREG32(SRBM_SOFT_RESET, tmp);
3712                 tmp = RREG32(SRBM_SOFT_RESET);
3713
3714                 udelay(50);
3715
3716                 tmp &= ~srbm_soft_reset;
3717                 WREG32(SRBM_SOFT_RESET, tmp);
3718                 tmp = RREG32(SRBM_SOFT_RESET);
3719         }
3720
3721         /* Wait a little for things to settle down */
3722         udelay(50);
3723
3724         evergreen_mc_resume(rdev, &save);
3725         udelay(50);
3726
3727         evergreen_print_gpu_status_regs(rdev);
3728 }
3729
3730 int si_asic_reset(struct radeon_device *rdev)
3731 {
3732         u32 reset_mask;
3733
3734         reset_mask = si_gpu_check_soft_reset(rdev);
3735
3736         if (reset_mask)
3737                 r600_set_bios_scratch_engine_hung(rdev, true);
3738
3739         si_gpu_soft_reset(rdev, reset_mask);
3740
3741         reset_mask = si_gpu_check_soft_reset(rdev);
3742
3743         if (!reset_mask)
3744                 r600_set_bios_scratch_engine_hung(rdev, false);
3745
3746         return 0;
3747 }
3748
3749 /**
3750  * si_gfx_is_lockup - Check if the GFX engine is locked up
3751  *
3752  * @rdev: radeon_device pointer
3753  * @ring: radeon_ring structure holding ring information
3754  *
3755  * Check if the GFX engine is locked up.
3756  * Returns true if the engine appears to be locked up, false if not.
3757  */
3758 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3759 {
3760         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3761
3762         if (!(reset_mask & (RADEON_RESET_GFX |
3763                             RADEON_RESET_COMPUTE |
3764                             RADEON_RESET_CP))) {
3765                 radeon_ring_lockup_update(ring);
3766                 return false;
3767         }
3768         /* force CP activities */
3769         radeon_ring_force_activity(rdev, ring);
3770         return radeon_ring_test_lockup(rdev, ring);
3771 }
3772
3773 /* MC */
3774 static void si_mc_program(struct radeon_device *rdev)
3775 {
3776         struct evergreen_mc_save save;
3777         u32 tmp;
3778         int i, j;
3779
3780         /* Initialize HDP */
3781         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3782                 WREG32((0x2c14 + j), 0x00000000);
3783                 WREG32((0x2c18 + j), 0x00000000);
3784                 WREG32((0x2c1c + j), 0x00000000);
3785                 WREG32((0x2c20 + j), 0x00000000);
3786                 WREG32((0x2c24 + j), 0x00000000);
3787         }
3788         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3789
3790         evergreen_mc_stop(rdev, &save);
3791         if (radeon_mc_wait_for_idle(rdev)) {
3792                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3793         }
3794         if (!ASIC_IS_NODCE(rdev))
3795                 /* Lockout access through VGA aperture*/
3796                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3797         /* Update configuration */
3798         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3799                rdev->mc.vram_start >> 12);
3800         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3801                rdev->mc.vram_end >> 12);
3802         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3803                rdev->vram_scratch.gpu_addr >> 12);
3804         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3805         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3806         WREG32(MC_VM_FB_LOCATION, tmp);
3807         /* XXX double check these! */
3808         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3809         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3810         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3811         WREG32(MC_VM_AGP_BASE, 0);
3812         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3813         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3814         if (radeon_mc_wait_for_idle(rdev)) {
3815                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3816         }
3817         evergreen_mc_resume(rdev, &save);
3818         if (!ASIC_IS_NODCE(rdev)) {
3819                 /* we need to own VRAM, so turn off the VGA renderer here
3820                  * to stop it overwriting our objects */
3821                 rv515_vga_render_disable(rdev);
3822         }
3823 }
3824
3825 void si_vram_gtt_location(struct radeon_device *rdev,
3826                           struct radeon_mc *mc)
3827 {
3828         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3829                 /* leave room for at least 1024M GTT */
3830                 dev_warn(rdev->dev, "limiting VRAM\n");
3831                 mc->real_vram_size = 0xFFC0000000ULL;
3832                 mc->mc_vram_size = 0xFFC0000000ULL;
3833         }
3834         radeon_vram_location(rdev, &rdev->mc, 0);
3835         rdev->mc.gtt_base_align = 0;
3836         radeon_gtt_location(rdev, mc);
3837 }
3838
3839 static int si_mc_init(struct radeon_device *rdev)
3840 {
3841         u32 tmp;
3842         int chansize, numchan;
3843
3844         /* Get VRAM informations */
3845         rdev->mc.vram_is_ddr = true;
3846         tmp = RREG32(MC_ARB_RAMCFG);
3847         if (tmp & CHANSIZE_OVERRIDE) {
3848                 chansize = 16;
3849         } else if (tmp & CHANSIZE_MASK) {
3850                 chansize = 64;
3851         } else {
3852                 chansize = 32;
3853         }
3854         tmp = RREG32(MC_SHARED_CHMAP);
3855         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3856         case 0:
3857         default:
3858                 numchan = 1;
3859                 break;
3860         case 1:
3861                 numchan = 2;
3862                 break;
3863         case 2:
3864                 numchan = 4;
3865                 break;
3866         case 3:
3867                 numchan = 8;
3868                 break;
3869         case 4:
3870                 numchan = 3;
3871                 break;
3872         case 5:
3873                 numchan = 6;
3874                 break;
3875         case 6:
3876                 numchan = 10;
3877                 break;
3878         case 7:
3879                 numchan = 12;
3880                 break;
3881         case 8:
3882                 numchan = 16;
3883                 break;
3884         }
3885         rdev->mc.vram_width = numchan * chansize;
3886         /* Could aper size report 0 ? */
3887         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3888         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3889         /* size in MB on si */
3890         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3891         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3892         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3893         si_vram_gtt_location(rdev, &rdev->mc);
3894         radeon_update_bandwidth_info(rdev);
3895
3896         return 0;
3897 }
3898
3899 /*
3900  * GART
3901  */
3902 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3903 {
3904         /* flush hdp cache */
3905         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3906
3907         /* bits 0-15 are the VM contexts0-15 */
3908         WREG32(VM_INVALIDATE_REQUEST, 1);
3909 }
3910
3911 static int si_pcie_gart_enable(struct radeon_device *rdev)
3912 {
3913         int r, i;
3914
3915         if (rdev->gart.robj == NULL) {
3916                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3917                 return -EINVAL;
3918         }
3919         r = radeon_gart_table_vram_pin(rdev);
3920         if (r)
3921                 return r;
3922         radeon_gart_restore(rdev);
3923         /* Setup TLB control */
3924         WREG32(MC_VM_MX_L1_TLB_CNTL,
3925                (0xA << 7) |
3926                ENABLE_L1_TLB |
3927                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3928                ENABLE_ADVANCED_DRIVER_MODEL |
3929                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3930         /* Setup L2 cache */
3931         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3932                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3933                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3934                EFFECTIVE_L2_QUEUE_SIZE(7) |
3935                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3936         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3937         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3938                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3939         /* setup context0 */
3940         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3941         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3942         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3943         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3944                         (u32)(rdev->dummy_page.addr >> 12));
3945         WREG32(VM_CONTEXT0_CNTL2, 0);
3946         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3947                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3948
3949         WREG32(0x15D4, 0);
3950         WREG32(0x15D8, 0);
3951         WREG32(0x15DC, 0);
3952
3953         /* empty context1-15 */
3954         /* set vm size, must be a multiple of 4 */
3955         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3956         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3957         /* Assign the pt base to something valid for now; the pts used for
3958          * the VMs are determined by the application and setup and assigned
3959          * on the fly in the vm part of radeon_gart.c
3960          */
3961         for (i = 1; i < 16; i++) {
3962                 if (i < 8)
3963                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3964                                rdev->gart.table_addr >> 12);
3965                 else
3966                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3967                                rdev->gart.table_addr >> 12);
3968         }
3969
3970         /* enable context1-15 */
3971         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3972                (u32)(rdev->dummy_page.addr >> 12));
3973         WREG32(VM_CONTEXT1_CNTL2, 4);
3974         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3975                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3976                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3977                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3979                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3983                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3984                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3985                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3986                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3987
3988         si_pcie_gart_tlb_flush(rdev);
3989         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3990                  (unsigned)(rdev->mc.gtt_size >> 20),
3991                  (unsigned long long)rdev->gart.table_addr);
3992         rdev->gart.ready = true;
3993         return 0;
3994 }
3995
3996 static void si_pcie_gart_disable(struct radeon_device *rdev)
3997 {
3998         /* Disable all tables */
3999         WREG32(VM_CONTEXT0_CNTL, 0);
4000         WREG32(VM_CONTEXT1_CNTL, 0);
4001         /* Setup TLB control */
4002         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4003                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4004         /* Setup L2 cache */
4005         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4006                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4007                EFFECTIVE_L2_QUEUE_SIZE(7) |
4008                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4009         WREG32(VM_L2_CNTL2, 0);
4010         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4011                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4012         radeon_gart_table_vram_unpin(rdev);
4013 }
4014
4015 static void si_pcie_gart_fini(struct radeon_device *rdev)
4016 {
4017         si_pcie_gart_disable(rdev);
4018         radeon_gart_table_vram_free(rdev);
4019         radeon_gart_fini(rdev);
4020 }
4021
4022 /* vm parser */
4023 static bool si_vm_reg_valid(u32 reg)
4024 {
4025         /* context regs are fine */
4026         if (reg >= 0x28000)
4027                 return true;
4028
4029         /* check config regs */
4030         switch (reg) {
4031         case GRBM_GFX_INDEX:
4032         case CP_STRMOUT_CNTL:
4033         case VGT_VTX_VECT_EJECT_REG:
4034         case VGT_CACHE_INVALIDATION:
4035         case VGT_ESGS_RING_SIZE:
4036         case VGT_GSVS_RING_SIZE:
4037         case VGT_GS_VERTEX_REUSE:
4038         case VGT_PRIMITIVE_TYPE:
4039         case VGT_INDEX_TYPE:
4040         case VGT_NUM_INDICES:
4041         case VGT_NUM_INSTANCES:
4042         case VGT_TF_RING_SIZE:
4043         case VGT_HS_OFFCHIP_PARAM:
4044         case VGT_TF_MEMORY_BASE:
4045         case PA_CL_ENHANCE:
4046         case PA_SU_LINE_STIPPLE_VALUE:
4047         case PA_SC_LINE_STIPPLE_STATE:
4048         case PA_SC_ENHANCE:
4049         case SQC_CACHES:
4050         case SPI_STATIC_THREAD_MGMT_1:
4051         case SPI_STATIC_THREAD_MGMT_2:
4052         case SPI_STATIC_THREAD_MGMT_3:
4053         case SPI_PS_MAX_WAVE_ID:
4054         case SPI_CONFIG_CNTL:
4055         case SPI_CONFIG_CNTL_1:
4056         case TA_CNTL_AUX:
4057                 return true;
4058         default:
4059                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4060                 return false;
4061         }
4062 }
4063
4064 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4065                                   u32 *ib, struct radeon_cs_packet *pkt)
4066 {
4067         switch (pkt->opcode) {
4068         case PACKET3_NOP:
4069         case PACKET3_SET_BASE:
4070         case PACKET3_SET_CE_DE_COUNTERS:
4071         case PACKET3_LOAD_CONST_RAM:
4072         case PACKET3_WRITE_CONST_RAM:
4073         case PACKET3_WRITE_CONST_RAM_OFFSET:
4074         case PACKET3_DUMP_CONST_RAM:
4075         case PACKET3_INCREMENT_CE_COUNTER:
4076         case PACKET3_WAIT_ON_DE_COUNTER:
4077         case PACKET3_CE_WRITE:
4078                 break;
4079         default:
4080                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4081                 return -EINVAL;
4082         }
4083         return 0;
4084 }
4085
4086 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4087 {
4088         u32 start_reg, reg, i;
4089         u32 command = ib[idx + 4];
4090         u32 info = ib[idx + 1];
4091         u32 idx_value = ib[idx];
4092         if (command & PACKET3_CP_DMA_CMD_SAS) {
4093                 /* src address space is register */
4094                 if (((info & 0x60000000) >> 29) == 0) {
4095                         start_reg = idx_value << 2;
4096                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4097                                 reg = start_reg;
4098                                 if (!si_vm_reg_valid(reg)) {
4099                                         DRM_ERROR("CP DMA Bad SRC register\n");
4100                                         return -EINVAL;
4101                                 }
4102                         } else {
4103                                 for (i = 0; i < (command & 0x1fffff); i++) {
4104                                         reg = start_reg + (4 * i);
4105                                         if (!si_vm_reg_valid(reg)) {
4106                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4107                                                 return -EINVAL;
4108                                         }
4109                                 }
4110                         }
4111                 }
4112         }
4113         if (command & PACKET3_CP_DMA_CMD_DAS) {
4114                 /* dst address space is register */
4115                 if (((info & 0x00300000) >> 20) == 0) {
4116                         start_reg = ib[idx + 2];
4117                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4118                                 reg = start_reg;
4119                                 if (!si_vm_reg_valid(reg)) {
4120                                         DRM_ERROR("CP DMA Bad DST register\n");
4121                                         return -EINVAL;
4122                                 }
4123                         } else {
4124                                 for (i = 0; i < (command & 0x1fffff); i++) {
4125                                         reg = start_reg + (4 * i);
4126                                 if (!si_vm_reg_valid(reg)) {
4127                                                 DRM_ERROR("CP DMA Bad DST register\n");
4128                                                 return -EINVAL;
4129                                         }
4130                                 }
4131                         }
4132                 }
4133         }
4134         return 0;
4135 }
4136
4137 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4138                                    u32 *ib, struct radeon_cs_packet *pkt)
4139 {
4140         int r;
4141         u32 idx = pkt->idx + 1;
4142         u32 idx_value = ib[idx];
4143         u32 start_reg, end_reg, reg, i;
4144
4145         switch (pkt->opcode) {
4146         case PACKET3_NOP:
4147         case PACKET3_SET_BASE:
4148         case PACKET3_CLEAR_STATE:
4149         case PACKET3_INDEX_BUFFER_SIZE:
4150         case PACKET3_DISPATCH_DIRECT:
4151         case PACKET3_DISPATCH_INDIRECT:
4152         case PACKET3_ALLOC_GDS:
4153         case PACKET3_WRITE_GDS_RAM:
4154         case PACKET3_ATOMIC_GDS:
4155         case PACKET3_ATOMIC:
4156         case PACKET3_OCCLUSION_QUERY:
4157         case PACKET3_SET_PREDICATION:
4158         case PACKET3_COND_EXEC:
4159         case PACKET3_PRED_EXEC:
4160         case PACKET3_DRAW_INDIRECT:
4161         case PACKET3_DRAW_INDEX_INDIRECT:
4162         case PACKET3_INDEX_BASE:
4163         case PACKET3_DRAW_INDEX_2:
4164         case PACKET3_CONTEXT_CONTROL:
4165         case PACKET3_INDEX_TYPE:
4166         case PACKET3_DRAW_INDIRECT_MULTI:
4167         case PACKET3_DRAW_INDEX_AUTO:
4168         case PACKET3_DRAW_INDEX_IMMD:
4169         case PACKET3_NUM_INSTANCES:
4170         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4171         case PACKET3_STRMOUT_BUFFER_UPDATE:
4172         case PACKET3_DRAW_INDEX_OFFSET_2:
4173         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4174         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4175         case PACKET3_MPEG_INDEX:
4176         case PACKET3_WAIT_REG_MEM:
4177         case PACKET3_MEM_WRITE:
4178         case PACKET3_PFP_SYNC_ME:
4179         case PACKET3_SURFACE_SYNC:
4180         case PACKET3_EVENT_WRITE:
4181         case PACKET3_EVENT_WRITE_EOP:
4182         case PACKET3_EVENT_WRITE_EOS:
4183         case PACKET3_SET_CONTEXT_REG:
4184         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4185         case PACKET3_SET_SH_REG:
4186         case PACKET3_SET_SH_REG_OFFSET:
4187         case PACKET3_INCREMENT_DE_COUNTER:
4188         case PACKET3_WAIT_ON_CE_COUNTER:
4189         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4190         case PACKET3_ME_WRITE:
4191                 break;
4192         case PACKET3_COPY_DATA:
4193                 if ((idx_value & 0xf00) == 0) {
4194                         reg = ib[idx + 3] * 4;
4195                         if (!si_vm_reg_valid(reg))
4196                                 return -EINVAL;
4197                 }
4198                 break;
4199         case PACKET3_WRITE_DATA:
4200                 if ((idx_value & 0xf00) == 0) {
4201                         start_reg = ib[idx + 1] * 4;
4202                         if (idx_value & 0x10000) {
4203                                 if (!si_vm_reg_valid(start_reg))
4204                                         return -EINVAL;
4205                         } else {
4206                                 for (i = 0; i < (pkt->count - 2); i++) {
4207                                         reg = start_reg + (4 * i);
4208                                         if (!si_vm_reg_valid(reg))
4209                                                 return -EINVAL;
4210                                 }
4211                         }
4212                 }
4213                 break;
4214         case PACKET3_COND_WRITE:
4215                 if (idx_value & 0x100) {
4216                         reg = ib[idx + 5] * 4;
4217                         if (!si_vm_reg_valid(reg))
4218                                 return -EINVAL;
4219                 }
4220                 break;
4221         case PACKET3_COPY_DW:
4222                 if (idx_value & 0x2) {
4223                         reg = ib[idx + 3] * 4;
4224                         if (!si_vm_reg_valid(reg))
4225                                 return -EINVAL;
4226                 }
4227                 break;
4228         case PACKET3_SET_CONFIG_REG:
4229                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4230                 end_reg = 4 * pkt->count + start_reg - 4;
4231                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4232                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4233                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4234                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4235                         return -EINVAL;
4236                 }
4237                 for (i = 0; i < pkt->count; i++) {
4238                         reg = start_reg + (4 * i);
4239                         if (!si_vm_reg_valid(reg))
4240                                 return -EINVAL;
4241                 }
4242                 break;
4243         case PACKET3_CP_DMA:
4244                 r = si_vm_packet3_cp_dma_check(ib, idx);
4245                 if (r)
4246                         return r;
4247                 break;
4248         default:
4249                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4250                 return -EINVAL;
4251         }
4252         return 0;
4253 }
4254
4255 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4256                                        u32 *ib, struct radeon_cs_packet *pkt)
4257 {
4258         int r;
4259         u32 idx = pkt->idx + 1;
4260         u32 idx_value = ib[idx];
4261         u32 start_reg, reg, i;
4262
4263         switch (pkt->opcode) {
4264         case PACKET3_NOP:
4265         case PACKET3_SET_BASE:
4266         case PACKET3_CLEAR_STATE:
4267         case PACKET3_DISPATCH_DIRECT:
4268         case PACKET3_DISPATCH_INDIRECT:
4269         case PACKET3_ALLOC_GDS:
4270         case PACKET3_WRITE_GDS_RAM:
4271         case PACKET3_ATOMIC_GDS:
4272         case PACKET3_ATOMIC:
4273         case PACKET3_OCCLUSION_QUERY:
4274         case PACKET3_SET_PREDICATION:
4275         case PACKET3_COND_EXEC:
4276         case PACKET3_PRED_EXEC:
4277         case PACKET3_CONTEXT_CONTROL:
4278         case PACKET3_STRMOUT_BUFFER_UPDATE:
4279         case PACKET3_WAIT_REG_MEM:
4280         case PACKET3_MEM_WRITE:
4281         case PACKET3_PFP_SYNC_ME:
4282         case PACKET3_SURFACE_SYNC:
4283         case PACKET3_EVENT_WRITE:
4284         case PACKET3_EVENT_WRITE_EOP:
4285         case PACKET3_EVENT_WRITE_EOS:
4286         case PACKET3_SET_CONTEXT_REG:
4287         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4288         case PACKET3_SET_SH_REG:
4289         case PACKET3_SET_SH_REG_OFFSET:
4290         case PACKET3_INCREMENT_DE_COUNTER:
4291         case PACKET3_WAIT_ON_CE_COUNTER:
4292         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4293         case PACKET3_ME_WRITE:
4294                 break;
4295         case PACKET3_COPY_DATA:
4296                 if ((idx_value & 0xf00) == 0) {
4297                         reg = ib[idx + 3] * 4;
4298                         if (!si_vm_reg_valid(reg))
4299                                 return -EINVAL;
4300                 }
4301                 break;
4302         case PACKET3_WRITE_DATA:
4303                 if ((idx_value & 0xf00) == 0) {
4304                         start_reg = ib[idx + 1] * 4;
4305                         if (idx_value & 0x10000) {
4306                                 if (!si_vm_reg_valid(start_reg))
4307                                         return -EINVAL;
4308                         } else {
4309                                 for (i = 0; i < (pkt->count - 2); i++) {
4310                                         reg = start_reg + (4 * i);
4311                                         if (!si_vm_reg_valid(reg))
4312                                                 return -EINVAL;
4313                                 }
4314                         }
4315                 }
4316                 break;
4317         case PACKET3_COND_WRITE:
4318                 if (idx_value & 0x100) {
4319                         reg = ib[idx + 5] * 4;
4320                         if (!si_vm_reg_valid(reg))
4321                                 return -EINVAL;
4322                 }
4323                 break;
4324         case PACKET3_COPY_DW:
4325                 if (idx_value & 0x2) {
4326                         reg = ib[idx + 3] * 4;
4327                         if (!si_vm_reg_valid(reg))
4328                                 return -EINVAL;
4329                 }
4330                 break;
4331         case PACKET3_CP_DMA:
4332                 r = si_vm_packet3_cp_dma_check(ib, idx);
4333                 if (r)
4334                         return r;
4335                 break;
4336         default:
4337                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4338                 return -EINVAL;
4339         }
4340         return 0;
4341 }
4342
4343 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4344 {
4345         int ret = 0;
4346         u32 idx = 0;
4347         struct radeon_cs_packet pkt;
4348
4349         do {
4350                 pkt.idx = idx;
4351                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4352                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4353                 pkt.one_reg_wr = 0;
4354                 switch (pkt.type) {
4355                 case RADEON_PACKET_TYPE0:
4356                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4357                         ret = -EINVAL;
4358                         break;
4359                 case RADEON_PACKET_TYPE2:
4360                         idx += 1;
4361                         break;
4362                 case RADEON_PACKET_TYPE3:
4363                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4364                         if (ib->is_const_ib)
4365                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4366                         else {
4367                                 switch (ib->ring) {
4368                                 case RADEON_RING_TYPE_GFX_INDEX:
4369                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4370                                         break;
4371                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4372                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4373                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4374                                         break;
4375                                 default:
4376                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4377                                         ret = -EINVAL;
4378                                         break;
4379                                 }
4380                         }
4381                         idx += pkt.count + 2;
4382                         break;
4383                 default:
4384                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4385                         ret = -EINVAL;
4386                         break;
4387                 }
4388                 if (ret)
4389                         break;
4390         } while (idx < ib->length_dw);
4391
4392         return ret;
4393 }
4394
4395 /*
4396  * vm
4397  */
4398 int si_vm_init(struct radeon_device *rdev)
4399 {
4400         /* number of VMs */
4401         rdev->vm_manager.nvm = 16;
4402         /* base offset of vram pages */
4403         rdev->vm_manager.vram_base_offset = 0;
4404
4405         return 0;
4406 }
4407
4408 void si_vm_fini(struct radeon_device *rdev)
4409 {
4410 }
4411
4412 /**
4413  * si_vm_decode_fault - print human readable fault info
4414  *
4415  * @rdev: radeon_device pointer
4416  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4417  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4418  *
4419  * Print human readable fault information (SI).
4420  */
4421 static void si_vm_decode_fault(struct radeon_device *rdev,
4422                                u32 status, u32 addr)
4423 {
4424         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4425         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4426         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4427         char *block;
4428
4429         if (rdev->family == CHIP_TAHITI) {
4430                 switch (mc_id) {
4431                 case 160:
4432                 case 144:
4433                 case 96:
4434                 case 80:
4435                 case 224:
4436                 case 208:
4437                 case 32:
4438                 case 16:
4439                         block = "CB";
4440                         break;
4441                 case 161:
4442                 case 145:
4443                 case 97:
4444                 case 81:
4445                 case 225:
4446                 case 209:
4447                 case 33:
4448                 case 17:
4449                         block = "CB_FMASK";
4450                         break;
4451                 case 162:
4452                 case 146:
4453                 case 98:
4454                 case 82:
4455                 case 226:
4456                 case 210:
4457                 case 34:
4458                 case 18:
4459                         block = "CB_CMASK";
4460                         break;
4461                 case 163:
4462                 case 147:
4463                 case 99:
4464                 case 83:
4465                 case 227:
4466                 case 211:
4467                 case 35:
4468                 case 19:
4469                         block = "CB_IMMED";
4470                         break;
4471                 case 164:
4472                 case 148:
4473                 case 100:
4474                 case 84:
4475                 case 228:
4476                 case 212:
4477                 case 36:
4478                 case 20:
4479                         block = "DB";
4480                         break;
4481                 case 165:
4482                 case 149:
4483                 case 101:
4484                 case 85:
4485                 case 229:
4486                 case 213:
4487                 case 37:
4488                 case 21:
4489                         block = "DB_HTILE";
4490                         break;
4491                 case 167:
4492                 case 151:
4493                 case 103:
4494                 case 87:
4495                 case 231:
4496                 case 215:
4497                 case 39:
4498                 case 23:
4499                         block = "DB_STEN";
4500                         break;
4501                 case 72:
4502                 case 68:
4503                 case 64:
4504                 case 8:
4505                 case 4:
4506                 case 0:
4507                 case 136:
4508                 case 132:
4509                 case 128:
4510                 case 200:
4511                 case 196:
4512                 case 192:
4513                         block = "TC";
4514                         break;
4515                 case 112:
4516                 case 48:
4517                         block = "CP";
4518                         break;
4519                 case 49:
4520                 case 177:
4521                 case 50:
4522                 case 178:
4523                         block = "SH";
4524                         break;
4525                 case 53:
4526                 case 190:
4527                         block = "VGT";
4528                         break;
4529                 case 117:
4530                         block = "IH";
4531                         break;
4532                 case 51:
4533                 case 115:
4534                         block = "RLC";
4535                         break;
4536                 case 119:
4537                 case 183:
4538                         block = "DMA0";
4539                         break;
4540                 case 61:
4541                         block = "DMA1";
4542                         break;
4543                 case 248:
4544                 case 120:
4545                         block = "HDP";
4546                         break;
4547                 default:
4548                         block = "unknown";
4549                         break;
4550                 }
4551         } else {
4552                 switch (mc_id) {
4553                 case 32:
4554                 case 16:
4555                 case 96:
4556                 case 80:
4557                 case 160:
4558                 case 144:
4559                 case 224:
4560                 case 208:
4561                         block = "CB";
4562                         break;
4563                 case 33:
4564                 case 17:
4565                 case 97:
4566                 case 81:
4567                 case 161:
4568                 case 145:
4569                 case 225:
4570                 case 209:
4571                         block = "CB_FMASK";
4572                         break;
4573                 case 34:
4574                 case 18:
4575                 case 98:
4576                 case 82:
4577                 case 162:
4578                 case 146:
4579                 case 226:
4580                 case 210:
4581                         block = "CB_CMASK";
4582                         break;
4583                 case 35:
4584                 case 19:
4585                 case 99:
4586                 case 83:
4587                 case 163:
4588                 case 147:
4589                 case 227:
4590                 case 211:
4591                         block = "CB_IMMED";
4592                         break;
4593                 case 36:
4594                 case 20:
4595                 case 100:
4596                 case 84:
4597                 case 164:
4598                 case 148:
4599                 case 228:
4600                 case 212:
4601                         block = "DB";
4602                         break;
4603                 case 37:
4604                 case 21:
4605                 case 101:
4606                 case 85:
4607                 case 165:
4608                 case 149:
4609                 case 229:
4610                 case 213:
4611                         block = "DB_HTILE";
4612                         break;
4613                 case 39:
4614                 case 23:
4615                 case 103:
4616                 case 87:
4617                 case 167:
4618                 case 151:
4619                 case 231:
4620                 case 215:
4621                         block = "DB_STEN";
4622                         break;
4623                 case 72:
4624                 case 68:
4625                 case 8:
4626                 case 4:
4627                 case 136:
4628                 case 132:
4629                 case 200:
4630                 case 196:
4631                         block = "TC";
4632                         break;
4633                 case 112:
4634                 case 48:
4635                         block = "CP";
4636                         break;
4637                 case 49:
4638                 case 177:
4639                 case 50:
4640                 case 178:
4641                         block = "SH";
4642                         break;
4643                 case 53:
4644                         block = "VGT";
4645                         break;
4646                 case 117:
4647                         block = "IH";
4648                         break;
4649                 case 51:
4650                 case 115:
4651                         block = "RLC";
4652                         break;
4653                 case 119:
4654                 case 183:
4655                         block = "DMA0";
4656                         break;
4657                 case 61:
4658                         block = "DMA1";
4659                         break;
4660                 case 248:
4661                 case 120:
4662                         block = "HDP";
4663                         break;
4664                 default:
4665                         block = "unknown";
4666                         break;
4667                 }
4668         }
4669
4670         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4671                protections, vmid, addr,
4672                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4673                block, mc_id);
4674 }
4675
4676 /**
4677  * si_vm_set_page - update the page tables using the CP
4678  *
4679  * @rdev: radeon_device pointer
4680  * @ib: indirect buffer to fill with commands
4681  * @pe: addr of the page entry
4682  * @addr: dst addr to write into pe
4683  * @count: number of page entries to update
4684  * @incr: increase next addr by incr bytes
4685  * @flags: access flags
4686  *
4687  * Update the page tables using the CP (SI).
4688  */
4689 void si_vm_set_page(struct radeon_device *rdev,
4690                     struct radeon_ib *ib,
4691                     uint64_t pe,
4692                     uint64_t addr, unsigned count,
4693                     uint32_t incr, uint32_t flags)
4694 {
4695         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4696         uint64_t value;
4697         unsigned ndw;
4698
4699         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4700                 while (count) {
4701                         ndw = 2 + count * 2;
4702                         if (ndw > 0x3FFE)
4703                                 ndw = 0x3FFE;
4704
4705                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4706                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4707                                         WRITE_DATA_DST_SEL(1));
4708                         ib->ptr[ib->length_dw++] = pe;
4709                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4710                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4711                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4712                                         value = radeon_vm_map_gart(rdev, addr);
4713                                         value &= 0xFFFFFFFFFFFFF000ULL;
4714                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4715                                         value = addr;
4716                                 } else {
4717                                         value = 0;
4718                                 }
4719                                 addr += incr;
4720                                 value |= r600_flags;
4721                                 ib->ptr[ib->length_dw++] = value;
4722                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4723                         }
4724                 }
4725         } else {
4726                 /* DMA */
4727                 si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4728         }
4729 }
4730
4731 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4732 {
4733         struct radeon_ring *ring = &rdev->ring[ridx];
4734
4735         if (vm == NULL)
4736                 return;
4737
4738         /* write new base address */
4739         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4740         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4741                                  WRITE_DATA_DST_SEL(0)));
4742
4743         if (vm->id < 8) {
4744                 radeon_ring_write(ring,
4745                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4746         } else {
4747                 radeon_ring_write(ring,
4748                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4749         }
4750         radeon_ring_write(ring, 0);
4751         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4752
4753         /* flush hdp cache */
4754         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4755         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4756                                  WRITE_DATA_DST_SEL(0)));
4757         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4758         radeon_ring_write(ring, 0);
4759         radeon_ring_write(ring, 0x1);
4760
4761         /* bits 0-15 are the VM contexts0-15 */
4762         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4763         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4764                                  WRITE_DATA_DST_SEL(0)));
4765         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4766         radeon_ring_write(ring, 0);
4767         radeon_ring_write(ring, 1 << vm->id);
4768
4769         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4770         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4771         radeon_ring_write(ring, 0x0);
4772 }
4773
4774 /*
4775  *  Power and clock gating
4776  */
4777 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4778 {
4779         int i;
4780
4781         for (i = 0; i < rdev->usec_timeout; i++) {
4782                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4783                         break;
4784                 udelay(1);
4785         }
4786
4787         for (i = 0; i < rdev->usec_timeout; i++) {
4788                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4789                         break;
4790                 udelay(1);
4791         }
4792 }
4793
4794 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4795                                          bool enable)
4796 {
4797         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4798         u32 mask;
4799         int i;
4800
4801         if (enable)
4802                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4803         else
4804                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4805         WREG32(CP_INT_CNTL_RING0, tmp);
4806
4807         if (!enable) {
4808                 /* read a gfx register */
4809                 tmp = RREG32(DB_DEPTH_INFO);
4810
4811                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4812                 for (i = 0; i < rdev->usec_timeout; i++) {
4813                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4814                                 break;
4815                         udelay(1);
4816                 }
4817         }
4818 }
4819
4820 static void si_set_uvd_dcm(struct radeon_device *rdev,
4821                            bool sw_mode)
4822 {
4823         u32 tmp, tmp2;
4824
4825         tmp = RREG32(UVD_CGC_CTRL);
4826         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4827         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4828
4829         if (sw_mode) {
4830                 tmp &= ~0x7ffff800;
4831                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4832         } else {
4833                 tmp |= 0x7ffff800;
4834                 tmp2 = 0;
4835         }
4836
4837         WREG32(UVD_CGC_CTRL, tmp);
4838         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4839 }
4840
4841 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4842 {
4843         bool hw_mode = true;
4844
4845         if (hw_mode) {
4846                 si_set_uvd_dcm(rdev, false);
4847         } else {
4848                 u32 tmp = RREG32(UVD_CGC_CTRL);
4849                 tmp &= ~DCM;
4850                 WREG32(UVD_CGC_CTRL, tmp);
4851         }
4852 }
4853
4854 static u32 si_halt_rlc(struct radeon_device *rdev)
4855 {
4856         u32 data, orig;
4857
4858         orig = data = RREG32(RLC_CNTL);
4859
4860         if (data & RLC_ENABLE) {
4861                 data &= ~RLC_ENABLE;
4862                 WREG32(RLC_CNTL, data);
4863
4864                 si_wait_for_rlc_serdes(rdev);
4865         }
4866
4867         return orig;
4868 }
4869
4870 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4871 {
4872         u32 tmp;
4873
4874         tmp = RREG32(RLC_CNTL);
4875         if (tmp != rlc)
4876                 WREG32(RLC_CNTL, rlc);
4877 }
4878
4879 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4880 {
4881         u32 data, orig;
4882
4883         orig = data = RREG32(DMA_PG);
4884         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4885                 data |= PG_CNTL_ENABLE;
4886         else
4887                 data &= ~PG_CNTL_ENABLE;
4888         if (orig != data)
4889                 WREG32(DMA_PG, data);
4890 }
4891
4892 static void si_init_dma_pg(struct radeon_device *rdev)
4893 {
4894         u32 tmp;
4895
4896         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4897         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4898
4899         for (tmp = 0; tmp < 5; tmp++)
4900                 WREG32(DMA_PGFSM_WRITE, 0);
4901 }
4902
4903 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4904                                bool enable)
4905 {
4906         u32 tmp;
4907
4908         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4909                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4910                 WREG32(RLC_TTOP_D, tmp);
4911
4912                 tmp = RREG32(RLC_PG_CNTL);
4913                 tmp |= GFX_PG_ENABLE;
4914                 WREG32(RLC_PG_CNTL, tmp);
4915
4916                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4917                 tmp |= AUTO_PG_EN;
4918                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4919         } else {
4920                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4921                 tmp &= ~AUTO_PG_EN;
4922                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4923
4924                 tmp = RREG32(DB_RENDER_CONTROL);
4925         }
4926 }
4927
4928 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4929 {
4930         u32 tmp;
4931
4932         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4933
4934         tmp = RREG32(RLC_PG_CNTL);
4935         tmp |= GFX_PG_SRC;
4936         WREG32(RLC_PG_CNTL, tmp);
4937
4938         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4939
4940         tmp = RREG32(RLC_AUTO_PG_CTRL);
4941
4942         tmp &= ~GRBM_REG_SGIT_MASK;
4943         tmp |= GRBM_REG_SGIT(0x700);
4944         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4945         WREG32(RLC_AUTO_PG_CTRL, tmp);
4946 }
4947
4948 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4949 {
4950         u32 mask = 0, tmp, tmp1;
4951         int i;
4952
4953         si_select_se_sh(rdev, se, sh);
4954         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4955         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4956         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4957
4958         tmp &= 0xffff0000;
4959
4960         tmp |= tmp1;
4961         tmp >>= 16;
4962
4963         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4964                 mask <<= 1;
4965                 mask |= 1;
4966         }
4967
4968         return (~tmp) & mask;
4969 }
4970
4971 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4972 {
4973         u32 i, j, k, active_cu_number = 0;
4974         u32 mask, counter, cu_bitmap;
4975         u32 tmp = 0;
4976
4977         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4978                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4979                         mask = 1;
4980                         cu_bitmap = 0;
4981                         counter  = 0;
4982                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4983                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4984                                         if (counter < 2)
4985                                                 cu_bitmap |= mask;
4986                                         counter++;
4987                                 }
4988                                 mask <<= 1;
4989                         }
4990
4991                         active_cu_number += counter;
4992                         tmp |= (cu_bitmap << (i * 16 + j * 8));
4993                 }
4994         }
4995
4996         WREG32(RLC_PG_AO_CU_MASK, tmp);
4997
4998         tmp = RREG32(RLC_MAX_PG_CU);
4999         tmp &= ~MAX_PU_CU_MASK;
5000         tmp |= MAX_PU_CU(active_cu_number);
5001         WREG32(RLC_MAX_PG_CU, tmp);
5002 }
5003
5004 static void si_enable_cgcg(struct radeon_device *rdev,
5005                            bool enable)
5006 {
5007         u32 data, orig, tmp;
5008
5009         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5010
5011         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5012                 si_enable_gui_idle_interrupt(rdev, true);
5013
5014                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5015
5016                 tmp = si_halt_rlc(rdev);
5017
5018                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5019                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5020                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5021
5022                 si_wait_for_rlc_serdes(rdev);
5023
5024                 si_update_rlc(rdev, tmp);
5025
5026                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5027
5028                 data |= CGCG_EN | CGLS_EN;
5029         } else {
5030                 si_enable_gui_idle_interrupt(rdev, false);
5031
5032                 RREG32(CB_CGTT_SCLK_CTRL);
5033                 RREG32(CB_CGTT_SCLK_CTRL);
5034                 RREG32(CB_CGTT_SCLK_CTRL);
5035                 RREG32(CB_CGTT_SCLK_CTRL);
5036
5037                 data &= ~(CGCG_EN | CGLS_EN);
5038         }
5039
5040         if (orig != data)
5041                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5042 }
5043
5044 static void si_enable_mgcg(struct radeon_device *rdev,
5045                            bool enable)
5046 {
5047         u32 data, orig, tmp = 0;
5048
5049         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5050                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5051                 data = 0x96940200;
5052                 if (orig != data)
5053                         WREG32(CGTS_SM_CTRL_REG, data);
5054
5055                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5056                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5057                         data |= CP_MEM_LS_EN;
5058                         if (orig != data)
5059                                 WREG32(CP_MEM_SLP_CNTL, data);
5060                 }
5061
5062                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5063                 data &= 0xffffffc0;
5064                 if (orig != data)
5065                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5066
5067                 tmp = si_halt_rlc(rdev);
5068
5069                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5070                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5071                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5072
5073                 si_update_rlc(rdev, tmp);
5074         } else {
5075                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5076                 data |= 0x00000003;
5077                 if (orig != data)
5078                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5079
5080                 data = RREG32(CP_MEM_SLP_CNTL);
5081                 if (data & CP_MEM_LS_EN) {
5082                         data &= ~CP_MEM_LS_EN;
5083                         WREG32(CP_MEM_SLP_CNTL, data);
5084                 }
5085                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5086                 data |= LS_OVERRIDE | OVERRIDE;
5087                 if (orig != data)
5088                         WREG32(CGTS_SM_CTRL_REG, data);
5089
5090                 tmp = si_halt_rlc(rdev);
5091
5092                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5093                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5094                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5095
5096                 si_update_rlc(rdev, tmp);
5097         }
5098 }
5099
5100 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5101                                bool enable)
5102 {
5103         u32 orig, data, tmp;
5104
5105         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5106                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5107                 tmp |= 0x3fff;
5108                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5109
5110                 orig = data = RREG32(UVD_CGC_CTRL);
5111                 data |= DCM;
5112                 if (orig != data)
5113                         WREG32(UVD_CGC_CTRL, data);
5114
5115                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5116                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5117         } else {
5118                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5119                 tmp &= ~0x3fff;
5120                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5121
5122                 orig = data = RREG32(UVD_CGC_CTRL);
5123                 data &= ~DCM;
5124                 if (orig != data)
5125                         WREG32(UVD_CGC_CTRL, data);
5126
5127                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5128                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5129         }
5130 }
5131
5132 static const u32 mc_cg_registers[] =
5133 {
5134         MC_HUB_MISC_HUB_CG,
5135         MC_HUB_MISC_SIP_CG,
5136         MC_HUB_MISC_VM_CG,
5137         MC_XPB_CLK_GAT,
5138         ATC_MISC_CG,
5139         MC_CITF_MISC_WR_CG,
5140         MC_CITF_MISC_RD_CG,
5141         MC_CITF_MISC_VM_CG,
5142         VM_L2_CG,
5143 };
5144
5145 static void si_enable_mc_ls(struct radeon_device *rdev,
5146                             bool enable)
5147 {
5148         int i;
5149         u32 orig, data;
5150
5151         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5152                 orig = data = RREG32(mc_cg_registers[i]);
5153                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5154                         data |= MC_LS_ENABLE;
5155                 else
5156                         data &= ~MC_LS_ENABLE;
5157                 if (data != orig)
5158                         WREG32(mc_cg_registers[i], data);
5159         }
5160 }
5161
5162 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5163                                bool enable)
5164 {
5165         int i;
5166         u32 orig, data;
5167
5168         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5169                 orig = data = RREG32(mc_cg_registers[i]);
5170                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5171                         data |= MC_CG_ENABLE;
5172                 else
5173                         data &= ~MC_CG_ENABLE;
5174                 if (data != orig)
5175                         WREG32(mc_cg_registers[i], data);
5176         }
5177 }
5178
5179 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5180                                bool enable)
5181 {
5182         u32 orig, data, offset;
5183         int i;
5184
5185         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5186                 for (i = 0; i < 2; i++) {
5187                         if (i == 0)
5188                                 offset = DMA0_REGISTER_OFFSET;
5189                         else
5190                                 offset = DMA1_REGISTER_OFFSET;
5191                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5192                         data &= ~MEM_POWER_OVERRIDE;
5193                         if (data != orig)
5194                                 WREG32(DMA_POWER_CNTL + offset, data);
5195                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5196                 }
5197         } else {
5198                 for (i = 0; i < 2; i++) {
5199                         if (i == 0)
5200                                 offset = DMA0_REGISTER_OFFSET;
5201                         else
5202                                 offset = DMA1_REGISTER_OFFSET;
5203                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5204                         data |= MEM_POWER_OVERRIDE;
5205                         if (data != orig)
5206                                 WREG32(DMA_POWER_CNTL + offset, data);
5207
5208                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5209                         data = 0xff000000;
5210                         if (data != orig)
5211                                 WREG32(DMA_CLK_CTRL + offset, data);
5212                 }
5213         }
5214 }
5215
5216 static void si_enable_bif_mgls(struct radeon_device *rdev,
5217                                bool enable)
5218 {
5219         u32 orig, data;
5220
5221         orig = data = RREG32_PCIE(PCIE_CNTL2);
5222
5223         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5224                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5225                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5226         else
5227                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5228                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5229
5230         if (orig != data)
5231                 WREG32_PCIE(PCIE_CNTL2, data);
5232 }
5233
5234 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5235                                bool enable)
5236 {
5237         u32 orig, data;
5238
5239         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5240
5241         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5242                 data &= ~CLOCK_GATING_DIS;
5243         else
5244                 data |= CLOCK_GATING_DIS;
5245
5246         if (orig != data)
5247                 WREG32(HDP_HOST_PATH_CNTL, data);
5248 }
5249
5250 static void si_enable_hdp_ls(struct radeon_device *rdev,
5251                              bool enable)
5252 {
5253         u32 orig, data;
5254
5255         orig = data = RREG32(HDP_MEM_POWER_LS);
5256
5257         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5258                 data |= HDP_LS_ENABLE;
5259         else
5260                 data &= ~HDP_LS_ENABLE;
5261
5262         if (orig != data)
5263                 WREG32(HDP_MEM_POWER_LS, data);
5264 }
5265
5266 void si_update_cg(struct radeon_device *rdev,
5267                   u32 block, bool enable)
5268 {
5269         if (block & RADEON_CG_BLOCK_GFX) {
5270                 si_enable_gui_idle_interrupt(rdev, false);
5271                 /* order matters! */
5272                 if (enable) {
5273                         si_enable_mgcg(rdev, true);
5274                         si_enable_cgcg(rdev, true);
5275                 } else {
5276                         si_enable_cgcg(rdev, false);
5277                         si_enable_mgcg(rdev, false);
5278                 }
5279                 si_enable_gui_idle_interrupt(rdev, true);
5280         }
5281
5282         if (block & RADEON_CG_BLOCK_MC) {
5283                 si_enable_mc_mgcg(rdev, enable);
5284                 si_enable_mc_ls(rdev, enable);
5285         }
5286
5287         if (block & RADEON_CG_BLOCK_SDMA) {
5288                 si_enable_dma_mgcg(rdev, enable);
5289         }
5290
5291         if (block & RADEON_CG_BLOCK_BIF) {
5292                 si_enable_bif_mgls(rdev, enable);
5293         }
5294
5295         if (block & RADEON_CG_BLOCK_UVD) {
5296                 if (rdev->has_uvd) {
5297                         si_enable_uvd_mgcg(rdev, enable);
5298                 }
5299         }
5300
5301         if (block & RADEON_CG_BLOCK_HDP) {
5302                 si_enable_hdp_mgcg(rdev, enable);
5303                 si_enable_hdp_ls(rdev, enable);
5304         }
5305 }
5306
5307 static void si_init_cg(struct radeon_device *rdev)
5308 {
5309         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5310                             RADEON_CG_BLOCK_MC |
5311                             RADEON_CG_BLOCK_SDMA |
5312                             RADEON_CG_BLOCK_BIF |
5313                             RADEON_CG_BLOCK_HDP), true);
5314         if (rdev->has_uvd) {
5315                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5316                 si_init_uvd_internal_cg(rdev);
5317         }
5318 }
5319
5320 static void si_fini_cg(struct radeon_device *rdev)
5321 {
5322         if (rdev->has_uvd) {
5323                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5324         }
5325         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5326                             RADEON_CG_BLOCK_MC |
5327                             RADEON_CG_BLOCK_SDMA |
5328                             RADEON_CG_BLOCK_BIF |
5329                             RADEON_CG_BLOCK_HDP), false);
5330 }
5331
5332 u32 si_get_csb_size(struct radeon_device *rdev)
5333 {
5334         u32 count = 0;
5335         const struct cs_section_def *sect = NULL;
5336         const struct cs_extent_def *ext = NULL;
5337
5338         if (rdev->rlc.cs_data == NULL)
5339                 return 0;
5340
5341         /* begin clear state */
5342         count += 2;
5343         /* context control state */
5344         count += 3;
5345
5346         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5347                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5348                         if (sect->id == SECT_CONTEXT)
5349                                 count += 2 + ext->reg_count;
5350                         else
5351                                 return 0;
5352                 }
5353         }
5354         /* pa_sc_raster_config */
5355         count += 3;
5356         /* end clear state */
5357         count += 2;
5358         /* clear state */
5359         count += 2;
5360
5361         return count;
5362 }
5363
5364 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5365 {
5366         u32 count = 0, i;
5367         const struct cs_section_def *sect = NULL;
5368         const struct cs_extent_def *ext = NULL;
5369
5370         if (rdev->rlc.cs_data == NULL)
5371                 return;
5372         if (buffer == NULL)
5373                 return;
5374
5375         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5376         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5377
5378         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5379         buffer[count++] = 0x80000000;
5380         buffer[count++] = 0x80000000;
5381
5382         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5383                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5384                         if (sect->id == SECT_CONTEXT) {
5385                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5386                                 buffer[count++] = ext->reg_index - 0xa000;
5387                                 for (i = 0; i < ext->reg_count; i++)
5388                                         buffer[count++] = ext->extent[i];
5389                         } else {
5390                                 return;
5391                         }
5392                 }
5393         }
5394
5395         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5396         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5397         switch (rdev->family) {
5398         case CHIP_TAHITI:
5399         case CHIP_PITCAIRN:
5400                 buffer[count++] = 0x2a00126a;
5401                 break;
5402         case CHIP_VERDE:
5403                 buffer[count++] = 0x0000124a;
5404                 break;
5405         case CHIP_OLAND:
5406                 buffer[count++] = 0x00000082;
5407                 break;
5408         case CHIP_HAINAN:
5409                 buffer[count++] = 0x00000000;
5410                 break;
5411         default:
5412                 buffer[count++] = 0x00000000;
5413                 break;
5414         }
5415
5416         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5417         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5418
5419         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5420         buffer[count++] = 0;
5421 }
5422
5423 static void si_init_pg(struct radeon_device *rdev)
5424 {
5425         if (rdev->pg_flags) {
5426                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5427                         si_init_dma_pg(rdev);
5428                 }
5429                 si_init_ao_cu_mask(rdev);
5430                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5431                         si_init_gfx_cgpg(rdev);
5432                 }
5433                 si_enable_dma_pg(rdev, true);
5434                 si_enable_gfx_cgpg(rdev, true);
5435         } else {
5436                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5437                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5438         }
5439 }
5440
5441 static void si_fini_pg(struct radeon_device *rdev)
5442 {
5443         if (rdev->pg_flags) {
5444                 si_enable_dma_pg(rdev, false);
5445                 si_enable_gfx_cgpg(rdev, false);
5446         }
5447 }
5448
5449 /*
5450  * RLC
5451  */
5452 void si_rlc_reset(struct radeon_device *rdev)
5453 {
5454         u32 tmp = RREG32(GRBM_SOFT_RESET);
5455
5456         tmp |= SOFT_RESET_RLC;
5457         WREG32(GRBM_SOFT_RESET, tmp);
5458         udelay(50);
5459         tmp &= ~SOFT_RESET_RLC;
5460         WREG32(GRBM_SOFT_RESET, tmp);
5461         udelay(50);
5462 }
5463
5464 static void si_rlc_stop(struct radeon_device *rdev)
5465 {
5466         WREG32(RLC_CNTL, 0);
5467
5468         si_enable_gui_idle_interrupt(rdev, false);
5469
5470         si_wait_for_rlc_serdes(rdev);
5471 }
5472
5473 static void si_rlc_start(struct radeon_device *rdev)
5474 {
5475         WREG32(RLC_CNTL, RLC_ENABLE);
5476
5477         si_enable_gui_idle_interrupt(rdev, true);
5478
5479         udelay(50);
5480 }
5481
5482 static bool si_lbpw_supported(struct radeon_device *rdev)
5483 {
5484         u32 tmp;
5485
5486         /* Enable LBPW only for DDR3 */
5487         tmp = RREG32(MC_SEQ_MISC0);
5488         if ((tmp & 0xF0000000) == 0xB0000000)
5489                 return true;
5490         return false;
5491 }
5492
5493 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5494 {
5495         u32 tmp;
5496
5497         tmp = RREG32(RLC_LB_CNTL);
5498         if (enable)
5499                 tmp |= LOAD_BALANCE_ENABLE;
5500         else
5501                 tmp &= ~LOAD_BALANCE_ENABLE;
5502         WREG32(RLC_LB_CNTL, tmp);
5503
5504         if (!enable) {
5505                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5506                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5507         }
5508 }
5509
5510 static int si_rlc_resume(struct radeon_device *rdev)
5511 {
5512         u32 i;
5513         const __be32 *fw_data;
5514
5515         if (!rdev->rlc_fw)
5516                 return -EINVAL;
5517
5518         si_rlc_stop(rdev);
5519
5520         si_rlc_reset(rdev);
5521
5522         si_init_pg(rdev);
5523
5524         si_init_cg(rdev);
5525
5526         WREG32(RLC_RL_BASE, 0);
5527         WREG32(RLC_RL_SIZE, 0);
5528         WREG32(RLC_LB_CNTL, 0);
5529         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5530         WREG32(RLC_LB_CNTR_INIT, 0);
5531         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5532
5533         WREG32(RLC_MC_CNTL, 0);
5534         WREG32(RLC_UCODE_CNTL, 0);
5535
5536         fw_data = (const __be32 *)rdev->rlc_fw->data;
5537         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5538                 WREG32(RLC_UCODE_ADDR, i);
5539                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5540         }
5541         WREG32(RLC_UCODE_ADDR, 0);
5542
5543         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5544
5545         si_rlc_start(rdev);
5546
5547         return 0;
5548 }
5549
5550 static void si_enable_interrupts(struct radeon_device *rdev)
5551 {
5552         u32 ih_cntl = RREG32(IH_CNTL);
5553         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5554
5555         ih_cntl |= ENABLE_INTR;
5556         ih_rb_cntl |= IH_RB_ENABLE;
5557         WREG32(IH_CNTL, ih_cntl);
5558         WREG32(IH_RB_CNTL, ih_rb_cntl);
5559         rdev->ih.enabled = true;
5560 }
5561
5562 static void si_disable_interrupts(struct radeon_device *rdev)
5563 {
5564         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5565         u32 ih_cntl = RREG32(IH_CNTL);
5566
5567         ih_rb_cntl &= ~IH_RB_ENABLE;
5568         ih_cntl &= ~ENABLE_INTR;
5569         WREG32(IH_RB_CNTL, ih_rb_cntl);
5570         WREG32(IH_CNTL, ih_cntl);
5571         /* set rptr, wptr to 0 */
5572         WREG32(IH_RB_RPTR, 0);
5573         WREG32(IH_RB_WPTR, 0);
5574         rdev->ih.enabled = false;
5575         rdev->ih.rptr = 0;
5576 }
5577
5578 static void si_disable_interrupt_state(struct radeon_device *rdev)
5579 {
5580         u32 tmp;
5581
5582         tmp = RREG32(CP_INT_CNTL_RING0) &
5583                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5584         WREG32(CP_INT_CNTL_RING0, tmp);
5585         WREG32(CP_INT_CNTL_RING1, 0);
5586         WREG32(CP_INT_CNTL_RING2, 0);
5587         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5588         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5589         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5590         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5591         WREG32(GRBM_INT_CNTL, 0);
5592         if (rdev->num_crtc >= 2) {
5593                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5594                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5595         }
5596         if (rdev->num_crtc >= 4) {
5597                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5598                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5599         }
5600         if (rdev->num_crtc >= 6) {
5601                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5602                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5603         }
5604
5605         if (rdev->num_crtc >= 2) {
5606                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5607                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5608         }
5609         if (rdev->num_crtc >= 4) {
5610                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5611                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5612         }
5613         if (rdev->num_crtc >= 6) {
5614                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5615                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5616         }
5617
5618         if (!ASIC_IS_NODCE(rdev)) {
5619                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5620
5621                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5622                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5623                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5624                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5625                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5626                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5627                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5628                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5629                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5630                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5631                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5632                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5633         }
5634 }
5635
5636 static int si_irq_init(struct radeon_device *rdev)
5637 {
5638         int ret = 0;
5639         int rb_bufsz;
5640         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5641
5642         /* allocate ring */
5643         ret = r600_ih_ring_alloc(rdev);
5644         if (ret)
5645                 return ret;
5646
5647         /* disable irqs */
5648         si_disable_interrupts(rdev);
5649
5650         /* init rlc */
5651         ret = si_rlc_resume(rdev);
5652         if (ret) {
5653                 r600_ih_ring_fini(rdev);
5654                 return ret;
5655         }
5656
5657         /* setup interrupt control */
5658         /* set dummy read address to ring address */
5659         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5660         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5661         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5662          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5663          */
5664         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5665         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5666         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5667         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5668
5669         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5670         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5671
5672         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5673                       IH_WPTR_OVERFLOW_CLEAR |
5674                       (rb_bufsz << 1));
5675
5676         if (rdev->wb.enabled)
5677                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5678
5679         /* set the writeback address whether it's enabled or not */
5680         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5681         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5682
5683         WREG32(IH_RB_CNTL, ih_rb_cntl);
5684
5685         /* set rptr, wptr to 0 */
5686         WREG32(IH_RB_RPTR, 0);
5687         WREG32(IH_RB_WPTR, 0);
5688
5689         /* Default settings for IH_CNTL (disabled at first) */
5690         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5691         /* RPTR_REARM only works if msi's are enabled */
5692         if (rdev->msi_enabled)
5693                 ih_cntl |= RPTR_REARM;
5694         WREG32(IH_CNTL, ih_cntl);
5695
5696         /* force the active interrupt state to all disabled */
5697         si_disable_interrupt_state(rdev);
5698
5699         pci_set_master(rdev->pdev);
5700
5701         /* enable irqs */
5702         si_enable_interrupts(rdev);
5703
5704         return ret;
5705 }
5706
5707 int si_irq_set(struct radeon_device *rdev)
5708 {
5709         u32 cp_int_cntl;
5710         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5711         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5712         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5713         u32 grbm_int_cntl = 0;
5714         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5715         u32 dma_cntl, dma_cntl1;
5716         u32 thermal_int = 0;
5717
5718         if (!rdev->irq.installed) {
5719                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5720                 return -EINVAL;
5721         }
5722         /* don't enable anything if the ih is disabled */
5723         if (!rdev->ih.enabled) {
5724                 si_disable_interrupts(rdev);
5725                 /* force the active interrupt state to all disabled */
5726                 si_disable_interrupt_state(rdev);
5727                 return 0;
5728         }
5729
5730         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5731                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5732
5733         if (!ASIC_IS_NODCE(rdev)) {
5734                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5735                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5736                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5737                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5738                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5739                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5740         }
5741
5742         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5743         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5744
5745         thermal_int = RREG32(CG_THERMAL_INT) &
5746                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5747
5748         /* enable CP interrupts on all rings */
5749         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5750                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5751                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5752         }
5753         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5754                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5755                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5756         }
5757         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5758                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5759                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5760         }
5761         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5762                 DRM_DEBUG("si_irq_set: sw int dma\n");
5763                 dma_cntl |= TRAP_ENABLE;
5764         }
5765
5766         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5767                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5768                 dma_cntl1 |= TRAP_ENABLE;
5769         }
5770         if (rdev->irq.crtc_vblank_int[0] ||
5771             atomic_read(&rdev->irq.pflip[0])) {
5772                 DRM_DEBUG("si_irq_set: vblank 0\n");
5773                 crtc1 |= VBLANK_INT_MASK;
5774         }
5775         if (rdev->irq.crtc_vblank_int[1] ||
5776             atomic_read(&rdev->irq.pflip[1])) {
5777                 DRM_DEBUG("si_irq_set: vblank 1\n");
5778                 crtc2 |= VBLANK_INT_MASK;
5779         }
5780         if (rdev->irq.crtc_vblank_int[2] ||
5781             atomic_read(&rdev->irq.pflip[2])) {
5782                 DRM_DEBUG("si_irq_set: vblank 2\n");
5783                 crtc3 |= VBLANK_INT_MASK;
5784         }
5785         if (rdev->irq.crtc_vblank_int[3] ||
5786             atomic_read(&rdev->irq.pflip[3])) {
5787                 DRM_DEBUG("si_irq_set: vblank 3\n");
5788                 crtc4 |= VBLANK_INT_MASK;
5789         }
5790         if (rdev->irq.crtc_vblank_int[4] ||
5791             atomic_read(&rdev->irq.pflip[4])) {
5792                 DRM_DEBUG("si_irq_set: vblank 4\n");
5793                 crtc5 |= VBLANK_INT_MASK;
5794         }
5795         if (rdev->irq.crtc_vblank_int[5] ||
5796             atomic_read(&rdev->irq.pflip[5])) {
5797                 DRM_DEBUG("si_irq_set: vblank 5\n");
5798                 crtc6 |= VBLANK_INT_MASK;
5799         }
5800         if (rdev->irq.hpd[0]) {
5801                 DRM_DEBUG("si_irq_set: hpd 1\n");
5802                 hpd1 |= DC_HPDx_INT_EN;
5803         }
5804         if (rdev->irq.hpd[1]) {
5805                 DRM_DEBUG("si_irq_set: hpd 2\n");
5806                 hpd2 |= DC_HPDx_INT_EN;
5807         }
5808         if (rdev->irq.hpd[2]) {
5809                 DRM_DEBUG("si_irq_set: hpd 3\n");
5810                 hpd3 |= DC_HPDx_INT_EN;
5811         }
5812         if (rdev->irq.hpd[3]) {
5813                 DRM_DEBUG("si_irq_set: hpd 4\n");
5814                 hpd4 |= DC_HPDx_INT_EN;
5815         }
5816         if (rdev->irq.hpd[4]) {
5817                 DRM_DEBUG("si_irq_set: hpd 5\n");
5818                 hpd5 |= DC_HPDx_INT_EN;
5819         }
5820         if (rdev->irq.hpd[5]) {
5821                 DRM_DEBUG("si_irq_set: hpd 6\n");
5822                 hpd6 |= DC_HPDx_INT_EN;
5823         }
5824
5825         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5826         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5827         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5828
5829         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5830         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5831
5832         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5833
5834         if (rdev->irq.dpm_thermal) {
5835                 DRM_DEBUG("dpm thermal\n");
5836                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5837         }
5838
5839         if (rdev->num_crtc >= 2) {
5840                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5841                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5842         }
5843         if (rdev->num_crtc >= 4) {
5844                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5845                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5846         }
5847         if (rdev->num_crtc >= 6) {
5848                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5849                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5850         }
5851
5852         if (rdev->num_crtc >= 2) {
5853                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5854                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5855         }
5856         if (rdev->num_crtc >= 4) {
5857                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5858                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5859         }
5860         if (rdev->num_crtc >= 6) {
5861                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5862                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5863         }
5864
5865         if (!ASIC_IS_NODCE(rdev)) {
5866                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5867                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5868                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5869                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5870                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5871                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5872         }
5873
5874         WREG32(CG_THERMAL_INT, thermal_int);
5875
5876         return 0;
5877 }
5878
5879 static inline void si_irq_ack(struct radeon_device *rdev)
5880 {
5881         u32 tmp;
5882
5883         if (ASIC_IS_NODCE(rdev))
5884                 return;
5885
5886         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5887         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5888         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5889         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5890         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5891         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5892         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5893         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5894         if (rdev->num_crtc >= 4) {
5895                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5896                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5897         }
5898         if (rdev->num_crtc >= 6) {
5899                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5900                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5901         }
5902
5903         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5904                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5905         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5906                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5907         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5908                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5909         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5910                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5911         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5912                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5913         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5914                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5915
5916         if (rdev->num_crtc >= 4) {
5917                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5918                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5919                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5920                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5921                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5922                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5923                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5924                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5925                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5926                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5927                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5928                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5929         }
5930
5931         if (rdev->num_crtc >= 6) {
5932                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5933                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5934                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5935                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5936                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5937                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5938                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5939                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5940                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5941                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5942                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5943                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5944         }
5945
5946         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5947                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5948                 tmp |= DC_HPDx_INT_ACK;
5949                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5950         }
5951         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5952                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5953                 tmp |= DC_HPDx_INT_ACK;
5954                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5955         }
5956         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5957                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5958                 tmp |= DC_HPDx_INT_ACK;
5959                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5960         }
5961         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5962                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5963                 tmp |= DC_HPDx_INT_ACK;
5964                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5965         }
5966         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5967                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5968                 tmp |= DC_HPDx_INT_ACK;
5969                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5970         }
5971         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5972                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5973                 tmp |= DC_HPDx_INT_ACK;
5974                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5975         }
5976 }
5977
5978 static void si_irq_disable(struct radeon_device *rdev)
5979 {
5980         si_disable_interrupts(rdev);
5981         /* Wait and acknowledge irq */
5982         mdelay(1);
5983         si_irq_ack(rdev);
5984         si_disable_interrupt_state(rdev);
5985 }
5986
5987 static void si_irq_suspend(struct radeon_device *rdev)
5988 {
5989         si_irq_disable(rdev);
5990         si_rlc_stop(rdev);
5991 }
5992
5993 static void si_irq_fini(struct radeon_device *rdev)
5994 {
5995         si_irq_suspend(rdev);
5996         r600_ih_ring_fini(rdev);
5997 }
5998
5999 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6000 {
6001         u32 wptr, tmp;
6002
6003         if (rdev->wb.enabled)
6004                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6005         else
6006                 wptr = RREG32(IH_RB_WPTR);
6007
6008         if (wptr & RB_OVERFLOW) {
6009                 /* When a ring buffer overflow happen start parsing interrupt
6010                  * from the last not overwritten vector (wptr + 16). Hopefully
6011                  * this should allow us to catchup.
6012                  */
6013                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6014                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6015                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6016                 tmp = RREG32(IH_RB_CNTL);
6017                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6018                 WREG32(IH_RB_CNTL, tmp);
6019         }
6020         return (wptr & rdev->ih.ptr_mask);
6021 }
6022
6023 /*        SI IV Ring
6024  * Each IV ring entry is 128 bits:
6025  * [7:0]    - interrupt source id
6026  * [31:8]   - reserved
6027  * [59:32]  - interrupt source data
6028  * [63:60]  - reserved
6029  * [71:64]  - RINGID
6030  * [79:72]  - VMID
6031  * [127:80] - reserved
6032  */
6033 int si_irq_process(struct radeon_device *rdev)
6034 {
6035         u32 wptr;
6036         u32 rptr;
6037         u32 src_id, src_data, ring_id;
6038         u32 ring_index;
6039         bool queue_hotplug = false;
6040         bool queue_thermal = false;
6041         u32 status, addr;
6042
6043         if (!rdev->ih.enabled || rdev->shutdown)
6044                 return IRQ_NONE;
6045
6046         wptr = si_get_ih_wptr(rdev);
6047
6048 restart_ih:
6049         /* is somebody else already processing irqs? */
6050         if (atomic_xchg(&rdev->ih.lock, 1))
6051                 return IRQ_NONE;
6052
6053         rptr = rdev->ih.rptr;
6054         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6055
6056         /* Order reading of wptr vs. reading of IH ring data */
6057         rmb();
6058
6059         /* display interrupts */
6060         si_irq_ack(rdev);
6061
6062         while (rptr != wptr) {
6063                 /* wptr/rptr are in bytes! */
6064                 ring_index = rptr / 4;
6065                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6066                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6067                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6068
6069                 switch (src_id) {
6070                 case 1: /* D1 vblank/vline */
6071                         switch (src_data) {
6072                         case 0: /* D1 vblank */
6073                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6074                                         if (rdev->irq.crtc_vblank_int[0]) {
6075                                                 drm_handle_vblank(rdev->ddev, 0);
6076                                                 rdev->pm.vblank_sync = true;
6077                                                 wake_up(&rdev->irq.vblank_queue);
6078                                         }
6079                                         if (atomic_read(&rdev->irq.pflip[0]))
6080                                                 radeon_crtc_handle_flip(rdev, 0);
6081                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6082                                         DRM_DEBUG("IH: D1 vblank\n");
6083                                 }
6084                                 break;
6085                         case 1: /* D1 vline */
6086                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6087                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6088                                         DRM_DEBUG("IH: D1 vline\n");
6089                                 }
6090                                 break;
6091                         default:
6092                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6093                                 break;
6094                         }
6095                         break;
6096                 case 2: /* D2 vblank/vline */
6097                         switch (src_data) {
6098                         case 0: /* D2 vblank */
6099                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6100                                         if (rdev->irq.crtc_vblank_int[1]) {
6101                                                 drm_handle_vblank(rdev->ddev, 1);
6102                                                 rdev->pm.vblank_sync = true;
6103                                                 wake_up(&rdev->irq.vblank_queue);
6104                                         }
6105                                         if (atomic_read(&rdev->irq.pflip[1]))
6106                                                 radeon_crtc_handle_flip(rdev, 1);
6107                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6108                                         DRM_DEBUG("IH: D2 vblank\n");
6109                                 }
6110                                 break;
6111                         case 1: /* D2 vline */
6112                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6113                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6114                                         DRM_DEBUG("IH: D2 vline\n");
6115                                 }
6116                                 break;
6117                         default:
6118                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6119                                 break;
6120                         }
6121                         break;
6122                 case 3: /* D3 vblank/vline */
6123                         switch (src_data) {
6124                         case 0: /* D3 vblank */
6125                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6126                                         if (rdev->irq.crtc_vblank_int[2]) {
6127                                                 drm_handle_vblank(rdev->ddev, 2);
6128                                                 rdev->pm.vblank_sync = true;
6129                                                 wake_up(&rdev->irq.vblank_queue);
6130                                         }
6131                                         if (atomic_read(&rdev->irq.pflip[2]))
6132                                                 radeon_crtc_handle_flip(rdev, 2);
6133                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6134                                         DRM_DEBUG("IH: D3 vblank\n");
6135                                 }
6136                                 break;
6137                         case 1: /* D3 vline */
6138                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6139                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6140                                         DRM_DEBUG("IH: D3 vline\n");
6141                                 }
6142                                 break;
6143                         default:
6144                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6145                                 break;
6146                         }
6147                         break;
6148                 case 4: /* D4 vblank/vline */
6149                         switch (src_data) {
6150                         case 0: /* D4 vblank */
6151                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6152                                         if (rdev->irq.crtc_vblank_int[3]) {
6153                                                 drm_handle_vblank(rdev->ddev, 3);
6154                                                 rdev->pm.vblank_sync = true;
6155                                                 wake_up(&rdev->irq.vblank_queue);
6156                                         }
6157                                         if (atomic_read(&rdev->irq.pflip[3]))
6158                                                 radeon_crtc_handle_flip(rdev, 3);
6159                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6160                                         DRM_DEBUG("IH: D4 vblank\n");
6161                                 }
6162                                 break;
6163                         case 1: /* D4 vline */
6164                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6165                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6166                                         DRM_DEBUG("IH: D4 vline\n");
6167                                 }
6168                                 break;
6169                         default:
6170                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6171                                 break;
6172                         }
6173                         break;
6174                 case 5: /* D5 vblank/vline */
6175                         switch (src_data) {
6176                         case 0: /* D5 vblank */
6177                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6178                                         if (rdev->irq.crtc_vblank_int[4]) {
6179                                                 drm_handle_vblank(rdev->ddev, 4);
6180                                                 rdev->pm.vblank_sync = true;
6181                                                 wake_up(&rdev->irq.vblank_queue);
6182                                         }
6183                                         if (atomic_read(&rdev->irq.pflip[4]))
6184                                                 radeon_crtc_handle_flip(rdev, 4);
6185                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6186                                         DRM_DEBUG("IH: D5 vblank\n");
6187                                 }
6188                                 break;
6189                         case 1: /* D5 vline */
6190                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6191                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6192                                         DRM_DEBUG("IH: D5 vline\n");
6193                                 }
6194                                 break;
6195                         default:
6196                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6197                                 break;
6198                         }
6199                         break;
6200                 case 6: /* D6 vblank/vline */
6201                         switch (src_data) {
6202                         case 0: /* D6 vblank */
6203                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6204                                         if (rdev->irq.crtc_vblank_int[5]) {
6205                                                 drm_handle_vblank(rdev->ddev, 5);
6206                                                 rdev->pm.vblank_sync = true;
6207                                                 wake_up(&rdev->irq.vblank_queue);
6208                                         }
6209                                         if (atomic_read(&rdev->irq.pflip[5]))
6210                                                 radeon_crtc_handle_flip(rdev, 5);
6211                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6212                                         DRM_DEBUG("IH: D6 vblank\n");
6213                                 }
6214                                 break;
6215                         case 1: /* D6 vline */
6216                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6217                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6218                                         DRM_DEBUG("IH: D6 vline\n");
6219                                 }
6220                                 break;
6221                         default:
6222                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6223                                 break;
6224                         }
6225                         break;
6226                 case 42: /* HPD hotplug */
6227                         switch (src_data) {
6228                         case 0:
6229                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6230                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6231                                         queue_hotplug = true;
6232                                         DRM_DEBUG("IH: HPD1\n");
6233                                 }
6234                                 break;
6235                         case 1:
6236                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6237                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6238                                         queue_hotplug = true;
6239                                         DRM_DEBUG("IH: HPD2\n");
6240                                 }
6241                                 break;
6242                         case 2:
6243                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6244                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6245                                         queue_hotplug = true;
6246                                         DRM_DEBUG("IH: HPD3\n");
6247                                 }
6248                                 break;
6249                         case 3:
6250                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6251                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6252                                         queue_hotplug = true;
6253                                         DRM_DEBUG("IH: HPD4\n");
6254                                 }
6255                                 break;
6256                         case 4:
6257                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6258                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6259                                         queue_hotplug = true;
6260                                         DRM_DEBUG("IH: HPD5\n");
6261                                 }
6262                                 break;
6263                         case 5:
6264                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6265                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6266                                         queue_hotplug = true;
6267                                         DRM_DEBUG("IH: HPD6\n");
6268                                 }
6269                                 break;
6270                         default:
6271                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6272                                 break;
6273                         }
6274                         break;
6275                 case 146:
6276                 case 147:
6277                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6278                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6279                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6280                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6281                                 addr);
6282                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6283                                 status);
6284                         si_vm_decode_fault(rdev, status, addr);
6285                         /* reset addr and status */
6286                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6287                         break;
6288                 case 176: /* RINGID0 CP_INT */
6289                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6290                         break;
6291                 case 177: /* RINGID1 CP_INT */
6292                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6293                         break;
6294                 case 178: /* RINGID2 CP_INT */
6295                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6296                         break;
6297                 case 181: /* CP EOP event */
6298                         DRM_DEBUG("IH: CP EOP\n");
6299                         switch (ring_id) {
6300                         case 0:
6301                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6302                                 break;
6303                         case 1:
6304                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6305                                 break;
6306                         case 2:
6307                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6308                                 break;
6309                         }
6310                         break;
6311                 case 224: /* DMA trap event */
6312                         DRM_DEBUG("IH: DMA trap\n");
6313                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6314                         break;
6315                 case 230: /* thermal low to high */
6316                         DRM_DEBUG("IH: thermal low to high\n");
6317                         rdev->pm.dpm.thermal.high_to_low = false;
6318                         queue_thermal = true;
6319                         break;
6320                 case 231: /* thermal high to low */
6321                         DRM_DEBUG("IH: thermal high to low\n");
6322                         rdev->pm.dpm.thermal.high_to_low = true;
6323                         queue_thermal = true;
6324                         break;
6325                 case 233: /* GUI IDLE */
6326                         DRM_DEBUG("IH: GUI idle\n");
6327                         break;
6328                 case 244: /* DMA trap event */
6329                         DRM_DEBUG("IH: DMA1 trap\n");
6330                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6331                         break;
6332                 default:
6333                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6334                         break;
6335                 }
6336
6337                 /* wptr/rptr are in bytes! */
6338                 rptr += 16;
6339                 rptr &= rdev->ih.ptr_mask;
6340         }
6341         if (queue_hotplug)
6342                 schedule_work(&rdev->hotplug_work);
6343         if (queue_thermal && rdev->pm.dpm_enabled)
6344                 schedule_work(&rdev->pm.dpm.thermal.work);
6345         rdev->ih.rptr = rptr;
6346         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6347         atomic_set(&rdev->ih.lock, 0);
6348
6349         /* make sure wptr hasn't changed while processing */
6350         wptr = si_get_ih_wptr(rdev);
6351         if (wptr != rptr)
6352                 goto restart_ih;
6353
6354         return IRQ_HANDLED;
6355 }
6356
6357 /*
6358  * startup/shutdown callbacks
6359  */
6360 static int si_startup(struct radeon_device *rdev)
6361 {
6362         struct radeon_ring *ring;
6363         int r;
6364
6365         /* enable pcie gen2/3 link */
6366         si_pcie_gen3_enable(rdev);
6367         /* enable aspm */
6368         si_program_aspm(rdev);
6369
6370         /* scratch needs to be initialized before MC */
6371         r = r600_vram_scratch_init(rdev);
6372         if (r)
6373                 return r;
6374
6375         si_mc_program(rdev);
6376
6377         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6378             !rdev->rlc_fw || !rdev->mc_fw) {
6379                 r = si_init_microcode(rdev);
6380                 if (r) {
6381                         DRM_ERROR("Failed to load firmware!\n");
6382                         return r;
6383                 }
6384         }
6385
6386         r = si_mc_load_microcode(rdev);
6387         if (r) {
6388                 DRM_ERROR("Failed to load MC firmware!\n");
6389                 return r;
6390         }
6391
6392         r = si_pcie_gart_enable(rdev);
6393         if (r)
6394                 return r;
6395         si_gpu_init(rdev);
6396
6397         /* allocate rlc buffers */
6398         if (rdev->family == CHIP_VERDE) {
6399                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6400                 rdev->rlc.reg_list_size =
6401                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6402         }
6403         rdev->rlc.cs_data = si_cs_data;
6404         r = sumo_rlc_init(rdev);
6405         if (r) {
6406                 DRM_ERROR("Failed to init rlc BOs!\n");
6407                 return r;
6408         }
6409
6410         /* allocate wb buffer */
6411         r = radeon_wb_init(rdev);
6412         if (r)
6413                 return r;
6414
6415         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6416         if (r) {
6417                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6418                 return r;
6419         }
6420
6421         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6422         if (r) {
6423                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6424                 return r;
6425         }
6426
6427         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6428         if (r) {
6429                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6430                 return r;
6431         }
6432
6433         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6434         if (r) {
6435                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6436                 return r;
6437         }
6438
6439         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6440         if (r) {
6441                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6442                 return r;
6443         }
6444
6445         if (rdev->has_uvd) {
6446                 r = uvd_v2_2_resume(rdev);
6447                 if (!r) {
6448                         r = radeon_fence_driver_start_ring(rdev,
6449                                                            R600_RING_TYPE_UVD_INDEX);
6450                         if (r)
6451                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6452                 }
6453                 if (r)
6454                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6455         }
6456
6457         /* Enable IRQ */
6458         if (!rdev->irq.installed) {
6459                 r = radeon_irq_kms_init(rdev);
6460                 if (r)
6461                         return r;
6462         }
6463
6464         r = si_irq_init(rdev);
6465         if (r) {
6466                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6467                 radeon_irq_kms_fini(rdev);
6468                 return r;
6469         }
6470         si_irq_set(rdev);
6471
6472         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6473         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6474                              CP_RB0_RPTR, CP_RB0_WPTR,
6475                              RADEON_CP_PACKET2);
6476         if (r)
6477                 return r;
6478
6479         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6480         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6481                              CP_RB1_RPTR, CP_RB1_WPTR,
6482                              RADEON_CP_PACKET2);
6483         if (r)
6484                 return r;
6485
6486         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6487         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6488                              CP_RB2_RPTR, CP_RB2_WPTR,
6489                              RADEON_CP_PACKET2);
6490         if (r)
6491                 return r;
6492
6493         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6494         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6495                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6496                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6497                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6498         if (r)
6499                 return r;
6500
6501         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6502         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6503                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6504                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6505                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6506         if (r)
6507                 return r;
6508
6509         r = si_cp_load_microcode(rdev);
6510         if (r)
6511                 return r;
6512         r = si_cp_resume(rdev);
6513         if (r)
6514                 return r;
6515
6516         r = cayman_dma_resume(rdev);
6517         if (r)
6518                 return r;
6519
6520         if (rdev->has_uvd) {
6521                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6522                 if (ring->ring_size) {
6523                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6524                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6525                                              RADEON_CP_PACKET2);
6526                         if (!r)
6527                                 r = uvd_v1_0_init(rdev);
6528                         if (r)
6529                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6530                 }
6531         }
6532
6533         r = radeon_ib_pool_init(rdev);
6534         if (r) {
6535                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6536                 return r;
6537         }
6538
6539         r = radeon_vm_manager_init(rdev);
6540         if (r) {
6541                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6542                 return r;
6543         }
6544
6545         r = dce6_audio_init(rdev);
6546         if (r)
6547                 return r;
6548
6549         return 0;
6550 }
6551
6552 int si_resume(struct radeon_device *rdev)
6553 {
6554         int r;
6555
6556         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6557          * posting will perform necessary task to bring back GPU into good
6558          * shape.
6559          */
6560         /* post card */
6561         atom_asic_init(rdev->mode_info.atom_context);
6562
6563         /* init golden registers */
6564         si_init_golden_registers(rdev);
6565
6566         rdev->accel_working = true;
6567         r = si_startup(rdev);
6568         if (r) {
6569                 DRM_ERROR("si startup failed on resume\n");
6570                 rdev->accel_working = false;
6571                 return r;
6572         }
6573
6574         return r;
6575
6576 }
6577
6578 int si_suspend(struct radeon_device *rdev)
6579 {
6580         dce6_audio_fini(rdev);
6581         radeon_vm_manager_fini(rdev);
6582         si_cp_enable(rdev, false);
6583         cayman_dma_stop(rdev);
6584         if (rdev->has_uvd) {
6585                 uvd_v1_0_fini(rdev);
6586                 radeon_uvd_suspend(rdev);
6587         }
6588         si_fini_pg(rdev);
6589         si_fini_cg(rdev);
6590         si_irq_suspend(rdev);
6591         radeon_wb_disable(rdev);
6592         si_pcie_gart_disable(rdev);
6593         return 0;
6594 }
6595
6596 /* Plan is to move initialization in that function and use
6597  * helper function so that radeon_device_init pretty much
6598  * do nothing more than calling asic specific function. This
6599  * should also allow to remove a bunch of callback function
6600  * like vram_info.
6601  */
6602 int si_init(struct radeon_device *rdev)
6603 {
6604         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6605         int r;
6606
6607         /* Read BIOS */
6608         if (!radeon_get_bios(rdev)) {
6609                 if (ASIC_IS_AVIVO(rdev))
6610                         return -EINVAL;
6611         }
6612         /* Must be an ATOMBIOS */
6613         if (!rdev->is_atom_bios) {
6614                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6615                 return -EINVAL;
6616         }
6617         r = radeon_atombios_init(rdev);
6618         if (r)
6619                 return r;
6620
6621         /* Post card if necessary */
6622         if (!radeon_card_posted(rdev)) {
6623                 if (!rdev->bios) {
6624                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6625                         return -EINVAL;
6626                 }
6627                 DRM_INFO("GPU not posted. posting now...\n");
6628                 atom_asic_init(rdev->mode_info.atom_context);
6629         }
6630         /* init golden registers */
6631         si_init_golden_registers(rdev);
6632         /* Initialize scratch registers */
6633         si_scratch_init(rdev);
6634         /* Initialize surface registers */
6635         radeon_surface_init(rdev);
6636         /* Initialize clocks */
6637         radeon_get_clock_info(rdev->ddev);
6638
6639         /* Fence driver */
6640         r = radeon_fence_driver_init(rdev);
6641         if (r)
6642                 return r;
6643
6644         /* initialize memory controller */
6645         r = si_mc_init(rdev);
6646         if (r)
6647                 return r;
6648         /* Memory manager */
6649         r = radeon_bo_init(rdev);
6650         if (r)
6651                 return r;
6652
6653         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6654         ring->ring_obj = NULL;
6655         r600_ring_init(rdev, ring, 1024 * 1024);
6656
6657         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6658         ring->ring_obj = NULL;
6659         r600_ring_init(rdev, ring, 1024 * 1024);
6660
6661         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6662         ring->ring_obj = NULL;
6663         r600_ring_init(rdev, ring, 1024 * 1024);
6664
6665         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6666         ring->ring_obj = NULL;
6667         r600_ring_init(rdev, ring, 64 * 1024);
6668
6669         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6670         ring->ring_obj = NULL;
6671         r600_ring_init(rdev, ring, 64 * 1024);
6672
6673         if (rdev->has_uvd) {
6674                 r = radeon_uvd_init(rdev);
6675                 if (!r) {
6676                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6677                         ring->ring_obj = NULL;
6678                         r600_ring_init(rdev, ring, 4096);
6679                 }
6680         }
6681
6682         rdev->ih.ring_obj = NULL;
6683         r600_ih_ring_init(rdev, 64 * 1024);
6684
6685         r = r600_pcie_gart_init(rdev);
6686         if (r)
6687                 return r;
6688
6689         rdev->accel_working = true;
6690         r = si_startup(rdev);
6691         if (r) {
6692                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6693                 si_cp_fini(rdev);
6694                 cayman_dma_fini(rdev);
6695                 si_irq_fini(rdev);
6696                 sumo_rlc_fini(rdev);
6697                 radeon_wb_fini(rdev);
6698                 radeon_ib_pool_fini(rdev);
6699                 radeon_vm_manager_fini(rdev);
6700                 radeon_irq_kms_fini(rdev);
6701                 si_pcie_gart_fini(rdev);
6702                 rdev->accel_working = false;
6703         }
6704
6705         /* Don't start up if the MC ucode is missing.
6706          * The default clocks and voltages before the MC ucode
6707          * is loaded are not suffient for advanced operations.
6708          */
6709         if (!rdev->mc_fw) {
6710                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6711                 return -EINVAL;
6712         }
6713
6714         return 0;
6715 }
6716
6717 void si_fini(struct radeon_device *rdev)
6718 {
6719         si_cp_fini(rdev);
6720         cayman_dma_fini(rdev);
6721         si_fini_pg(rdev);
6722         si_fini_cg(rdev);
6723         si_irq_fini(rdev);
6724         sumo_rlc_fini(rdev);
6725         radeon_wb_fini(rdev);
6726         radeon_vm_manager_fini(rdev);
6727         radeon_ib_pool_fini(rdev);
6728         radeon_irq_kms_fini(rdev);
6729         if (rdev->has_uvd) {
6730                 uvd_v1_0_fini(rdev);
6731                 radeon_uvd_fini(rdev);
6732         }
6733         si_pcie_gart_fini(rdev);
6734         r600_vram_scratch_fini(rdev);
6735         radeon_gem_fini(rdev);
6736         radeon_fence_driver_fini(rdev);
6737         radeon_bo_fini(rdev);
6738         radeon_atombios_fini(rdev);
6739         kfree(rdev->bios);
6740         rdev->bios = NULL;
6741 }
6742
6743 /**
6744  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6745  *
6746  * @rdev: radeon_device pointer
6747  *
6748  * Fetches a GPU clock counter snapshot (SI).
6749  * Returns the 64 bit clock counter snapshot.
6750  */
6751 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6752 {
6753         uint64_t clock;
6754
6755         mutex_lock(&rdev->gpu_clock_mutex);
6756         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6757         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6758                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6759         mutex_unlock(&rdev->gpu_clock_mutex);
6760         return clock;
6761 }
6762
6763 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6764 {
6765         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6766         int r;
6767
6768         /* bypass vclk and dclk with bclk */
6769         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6770                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6771                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6772
6773         /* put PLL in bypass mode */
6774         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6775
6776         if (!vclk || !dclk) {
6777                 /* keep the Bypass mode, put PLL to sleep */
6778                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6779                 return 0;
6780         }
6781
6782         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6783                                           16384, 0x03FFFFFF, 0, 128, 5,
6784                                           &fb_div, &vclk_div, &dclk_div);
6785         if (r)
6786                 return r;
6787
6788         /* set RESET_ANTI_MUX to 0 */
6789         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6790
6791         /* set VCO_MODE to 1 */
6792         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6793
6794         /* toggle UPLL_SLEEP to 1 then back to 0 */
6795         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6796         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6797
6798         /* deassert UPLL_RESET */
6799         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6800
6801         mdelay(1);
6802
6803         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6804         if (r)
6805                 return r;
6806
6807         /* assert UPLL_RESET again */
6808         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6809
6810         /* disable spread spectrum. */
6811         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6812
6813         /* set feedback divider */
6814         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6815
6816         /* set ref divider to 0 */
6817         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6818
6819         if (fb_div < 307200)
6820                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6821         else
6822                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6823
6824         /* set PDIV_A and PDIV_B */
6825         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6826                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6827                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6828
6829         /* give the PLL some time to settle */
6830         mdelay(15);
6831
6832         /* deassert PLL_RESET */
6833         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6834
6835         mdelay(15);
6836
6837         /* switch from bypass mode to normal mode */
6838         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6839
6840         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6841         if (r)
6842                 return r;
6843
6844         /* switch VCLK and DCLK selection */
6845         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6846                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6847                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6848
6849         mdelay(100);
6850
6851         return 0;
6852 }
6853
6854 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6855 {
6856         struct pci_dev *root = rdev->pdev->bus->self;
6857         int bridge_pos, gpu_pos;
6858         u32 speed_cntl, mask, current_data_rate;
6859         int ret, i;
6860         u16 tmp16;
6861
6862         if (radeon_pcie_gen2 == 0)
6863                 return;
6864
6865         if (rdev->flags & RADEON_IS_IGP)
6866                 return;
6867
6868         if (!(rdev->flags & RADEON_IS_PCIE))
6869                 return;
6870
6871         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6872         if (ret != 0)
6873                 return;
6874
6875         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6876                 return;
6877
6878         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6879         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6880                 LC_CURRENT_DATA_RATE_SHIFT;
6881         if (mask & DRM_PCIE_SPEED_80) {
6882                 if (current_data_rate == 2) {
6883                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6884                         return;
6885                 }
6886                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6887         } else if (mask & DRM_PCIE_SPEED_50) {
6888                 if (current_data_rate == 1) {
6889                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6890                         return;
6891                 }
6892                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6893         }
6894
6895         bridge_pos = pci_pcie_cap(root);
6896         if (!bridge_pos)
6897                 return;
6898
6899         gpu_pos = pci_pcie_cap(rdev->pdev);
6900         if (!gpu_pos)
6901                 return;
6902
6903         if (mask & DRM_PCIE_SPEED_80) {
6904                 /* re-try equalization if gen3 is not already enabled */
6905                 if (current_data_rate != 2) {
6906                         u16 bridge_cfg, gpu_cfg;
6907                         u16 bridge_cfg2, gpu_cfg2;
6908                         u32 max_lw, current_lw, tmp;
6909
6910                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6911                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6912
6913                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6914                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6915
6916                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6917                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6918
6919                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6920                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6921                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6922
6923                         if (current_lw < max_lw) {
6924                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6925                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6926                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6927                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6928                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6929                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6930                                 }
6931                         }
6932
6933                         for (i = 0; i < 10; i++) {
6934                                 /* check status */
6935                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6936                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6937                                         break;
6938
6939                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6940                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6941
6942                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6943                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6944
6945                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6946                                 tmp |= LC_SET_QUIESCE;
6947                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6948
6949                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6950                                 tmp |= LC_REDO_EQ;
6951                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6952
6953                                 mdelay(100);
6954
6955                                 /* linkctl */
6956                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6957                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6958                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6959                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6960
6961                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6962                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6963                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6964                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6965
6966                                 /* linkctl2 */
6967                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6968                                 tmp16 &= ~((1 << 4) | (7 << 9));
6969                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6970                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6971
6972                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6973                                 tmp16 &= ~((1 << 4) | (7 << 9));
6974                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6975                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6976
6977                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6978                                 tmp &= ~LC_SET_QUIESCE;
6979                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6980                         }
6981                 }
6982         }
6983
6984         /* set the link speed */
6985         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6986         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6987         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6988
6989         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6990         tmp16 &= ~0xf;
6991         if (mask & DRM_PCIE_SPEED_80)
6992                 tmp16 |= 3; /* gen3 */
6993         else if (mask & DRM_PCIE_SPEED_50)
6994                 tmp16 |= 2; /* gen2 */
6995         else
6996                 tmp16 |= 1; /* gen1 */
6997         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6998
6999         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7000         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7001         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7002
7003         for (i = 0; i < rdev->usec_timeout; i++) {
7004                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7005                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7006                         break;
7007                 udelay(1);
7008         }
7009 }
7010
7011 static void si_program_aspm(struct radeon_device *rdev)
7012 {
7013         u32 data, orig;
7014         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7015         bool disable_clkreq = false;
7016
7017         if (radeon_aspm == 0)
7018                 return;
7019
7020         if (!(rdev->flags & RADEON_IS_PCIE))
7021                 return;
7022
7023         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7024         data &= ~LC_XMIT_N_FTS_MASK;
7025         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7026         if (orig != data)
7027                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7028
7029         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7030         data |= LC_GO_TO_RECOVERY;
7031         if (orig != data)
7032                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7033
7034         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7035         data |= P_IGNORE_EDB_ERR;
7036         if (orig != data)
7037                 WREG32_PCIE(PCIE_P_CNTL, data);
7038
7039         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7040         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7041         data |= LC_PMI_TO_L1_DIS;
7042         if (!disable_l0s)
7043                 data |= LC_L0S_INACTIVITY(7);
7044
7045         if (!disable_l1) {
7046                 data |= LC_L1_INACTIVITY(7);
7047                 data &= ~LC_PMI_TO_L1_DIS;
7048                 if (orig != data)
7049                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7050
7051                 if (!disable_plloff_in_l1) {
7052                         bool clk_req_support;
7053
7054                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7055                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7056                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7057                         if (orig != data)
7058                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7059
7060                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7061                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7062                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7063                         if (orig != data)
7064                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7065
7066                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7067                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7068                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7069                         if (orig != data)
7070                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7071
7072                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7073                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7074                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7075                         if (orig != data)
7076                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7077
7078                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7079                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7080                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7081                                 if (orig != data)
7082                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7083
7084                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7085                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7086                                 if (orig != data)
7087                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7088
7089                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7090                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7091                                 if (orig != data)
7092                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7093
7094                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7095                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7096                                 if (orig != data)
7097                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7098
7099                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7100                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7101                                 if (orig != data)
7102                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7103
7104                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7105                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7106                                 if (orig != data)
7107                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7108
7109                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7110                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7111                                 if (orig != data)
7112                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7113
7114                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7115                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7116                                 if (orig != data)
7117                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7118                         }
7119                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7120                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7121                         data |= LC_DYN_LANES_PWR_STATE(3);
7122                         if (orig != data)
7123                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7124
7125                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7126                         data &= ~LS2_EXIT_TIME_MASK;
7127                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7128                                 data |= LS2_EXIT_TIME(5);
7129                         if (orig != data)
7130                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7131
7132                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7133                         data &= ~LS2_EXIT_TIME_MASK;
7134                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7135                                 data |= LS2_EXIT_TIME(5);
7136                         if (orig != data)
7137                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7138
7139                         if (!disable_clkreq) {
7140                                 struct pci_dev *root = rdev->pdev->bus->self;
7141                                 u32 lnkcap;
7142
7143                                 clk_req_support = false;
7144                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7145                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7146                                         clk_req_support = true;
7147                         } else {
7148                                 clk_req_support = false;
7149                         }
7150
7151                         if (clk_req_support) {
7152                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7153                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7154                                 if (orig != data)
7155                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7156
7157                                 orig = data = RREG32(THM_CLK_CNTL);
7158                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7159                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7160                                 if (orig != data)
7161                                         WREG32(THM_CLK_CNTL, data);
7162
7163                                 orig = data = RREG32(MISC_CLK_CNTL);
7164                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7165                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7166                                 if (orig != data)
7167                                         WREG32(MISC_CLK_CNTL, data);
7168
7169                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7170                                 data &= ~BCLK_AS_XCLK;
7171                                 if (orig != data)
7172                                         WREG32(CG_CLKPIN_CNTL, data);
7173
7174                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7175                                 data &= ~FORCE_BIF_REFCLK_EN;
7176                                 if (orig != data)
7177                                         WREG32(CG_CLKPIN_CNTL_2, data);
7178
7179                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7180                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7181                                 data |= MPLL_CLKOUT_SEL(4);
7182                                 if (orig != data)
7183                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7184
7185                                 orig = data = RREG32(SPLL_CNTL_MODE);
7186                                 data &= ~SPLL_REFCLK_SEL_MASK;
7187                                 if (orig != data)
7188                                         WREG32(SPLL_CNTL_MODE, data);
7189                         }
7190                 }
7191         } else {
7192                 if (orig != data)
7193                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7194         }
7195
7196         orig = data = RREG32_PCIE(PCIE_CNTL2);
7197         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7198         if (orig != data)
7199                 WREG32_PCIE(PCIE_CNTL2, data);
7200
7201         if (!disable_l0s) {
7202                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7203                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7204                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7205                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7206                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7207                                 data &= ~LC_L0S_INACTIVITY_MASK;
7208                                 if (orig != data)
7209                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7210                         }
7211                 }
7212         }
7213 }