2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 static void cik_fini_pg(struct radeon_device *rdev);
81 static void cik_fini_cg(struct radeon_device *rdev);
82 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
85 /* get temperature in millidegrees */
86 int ci_get_temp(struct radeon_device *rdev)
91 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
97 actual_temp = temp & 0x1ff;
99 actual_temp = actual_temp * 1000;
104 /* get temperature in millidegrees */
105 int kv_get_temp(struct radeon_device *rdev)
110 temp = RREG32_SMC(0xC0300E0C);
113 actual_temp = (temp / 8) - 49;
117 actual_temp = actual_temp * 1000;
123 * Indirect registers accessor
125 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
130 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
131 WREG32(PCIE_INDEX, reg);
132 (void)RREG32(PCIE_INDEX);
133 r = RREG32(PCIE_DATA);
134 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 WREG32(PCIE_INDEX, reg);
144 (void)RREG32(PCIE_INDEX);
145 WREG32(PCIE_DATA, v);
146 (void)RREG32(PCIE_DATA);
147 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
150 static const u32 spectre_rlc_save_restore_register_list[] =
152 (0x0e00 << 16) | (0xc12c >> 2),
154 (0x0e00 << 16) | (0xc140 >> 2),
156 (0x0e00 << 16) | (0xc150 >> 2),
158 (0x0e00 << 16) | (0xc15c >> 2),
160 (0x0e00 << 16) | (0xc168 >> 2),
162 (0x0e00 << 16) | (0xc170 >> 2),
164 (0x0e00 << 16) | (0xc178 >> 2),
166 (0x0e00 << 16) | (0xc204 >> 2),
168 (0x0e00 << 16) | (0xc2b4 >> 2),
170 (0x0e00 << 16) | (0xc2b8 >> 2),
172 (0x0e00 << 16) | (0xc2bc >> 2),
174 (0x0e00 << 16) | (0xc2c0 >> 2),
176 (0x0e00 << 16) | (0x8228 >> 2),
178 (0x0e00 << 16) | (0x829c >> 2),
180 (0x0e00 << 16) | (0x869c >> 2),
182 (0x0600 << 16) | (0x98f4 >> 2),
184 (0x0e00 << 16) | (0x98f8 >> 2),
186 (0x0e00 << 16) | (0x9900 >> 2),
188 (0x0e00 << 16) | (0xc260 >> 2),
190 (0x0e00 << 16) | (0x90e8 >> 2),
192 (0x0e00 << 16) | (0x3c000 >> 2),
194 (0x0e00 << 16) | (0x3c00c >> 2),
196 (0x0e00 << 16) | (0x8c1c >> 2),
198 (0x0e00 << 16) | (0x9700 >> 2),
200 (0x0e00 << 16) | (0xcd20 >> 2),
202 (0x4e00 << 16) | (0xcd20 >> 2),
204 (0x5e00 << 16) | (0xcd20 >> 2),
206 (0x6e00 << 16) | (0xcd20 >> 2),
208 (0x7e00 << 16) | (0xcd20 >> 2),
210 (0x8e00 << 16) | (0xcd20 >> 2),
212 (0x9e00 << 16) | (0xcd20 >> 2),
214 (0xae00 << 16) | (0xcd20 >> 2),
216 (0xbe00 << 16) | (0xcd20 >> 2),
218 (0x0e00 << 16) | (0x89bc >> 2),
220 (0x0e00 << 16) | (0x8900 >> 2),
223 (0x0e00 << 16) | (0xc130 >> 2),
225 (0x0e00 << 16) | (0xc134 >> 2),
227 (0x0e00 << 16) | (0xc1fc >> 2),
229 (0x0e00 << 16) | (0xc208 >> 2),
231 (0x0e00 << 16) | (0xc264 >> 2),
233 (0x0e00 << 16) | (0xc268 >> 2),
235 (0x0e00 << 16) | (0xc26c >> 2),
237 (0x0e00 << 16) | (0xc270 >> 2),
239 (0x0e00 << 16) | (0xc274 >> 2),
241 (0x0e00 << 16) | (0xc278 >> 2),
243 (0x0e00 << 16) | (0xc27c >> 2),
245 (0x0e00 << 16) | (0xc280 >> 2),
247 (0x0e00 << 16) | (0xc284 >> 2),
249 (0x0e00 << 16) | (0xc288 >> 2),
251 (0x0e00 << 16) | (0xc28c >> 2),
253 (0x0e00 << 16) | (0xc290 >> 2),
255 (0x0e00 << 16) | (0xc294 >> 2),
257 (0x0e00 << 16) | (0xc298 >> 2),
259 (0x0e00 << 16) | (0xc29c >> 2),
261 (0x0e00 << 16) | (0xc2a0 >> 2),
263 (0x0e00 << 16) | (0xc2a4 >> 2),
265 (0x0e00 << 16) | (0xc2a8 >> 2),
267 (0x0e00 << 16) | (0xc2ac >> 2),
269 (0x0e00 << 16) | (0xc2b0 >> 2),
271 (0x0e00 << 16) | (0x301d0 >> 2),
273 (0x0e00 << 16) | (0x30238 >> 2),
275 (0x0e00 << 16) | (0x30250 >> 2),
277 (0x0e00 << 16) | (0x30254 >> 2),
279 (0x0e00 << 16) | (0x30258 >> 2),
281 (0x0e00 << 16) | (0x3025c >> 2),
283 (0x4e00 << 16) | (0xc900 >> 2),
285 (0x5e00 << 16) | (0xc900 >> 2),
287 (0x6e00 << 16) | (0xc900 >> 2),
289 (0x7e00 << 16) | (0xc900 >> 2),
291 (0x8e00 << 16) | (0xc900 >> 2),
293 (0x9e00 << 16) | (0xc900 >> 2),
295 (0xae00 << 16) | (0xc900 >> 2),
297 (0xbe00 << 16) | (0xc900 >> 2),
299 (0x4e00 << 16) | (0xc904 >> 2),
301 (0x5e00 << 16) | (0xc904 >> 2),
303 (0x6e00 << 16) | (0xc904 >> 2),
305 (0x7e00 << 16) | (0xc904 >> 2),
307 (0x8e00 << 16) | (0xc904 >> 2),
309 (0x9e00 << 16) | (0xc904 >> 2),
311 (0xae00 << 16) | (0xc904 >> 2),
313 (0xbe00 << 16) | (0xc904 >> 2),
315 (0x4e00 << 16) | (0xc908 >> 2),
317 (0x5e00 << 16) | (0xc908 >> 2),
319 (0x6e00 << 16) | (0xc908 >> 2),
321 (0x7e00 << 16) | (0xc908 >> 2),
323 (0x8e00 << 16) | (0xc908 >> 2),
325 (0x9e00 << 16) | (0xc908 >> 2),
327 (0xae00 << 16) | (0xc908 >> 2),
329 (0xbe00 << 16) | (0xc908 >> 2),
331 (0x4e00 << 16) | (0xc90c >> 2),
333 (0x5e00 << 16) | (0xc90c >> 2),
335 (0x6e00 << 16) | (0xc90c >> 2),
337 (0x7e00 << 16) | (0xc90c >> 2),
339 (0x8e00 << 16) | (0xc90c >> 2),
341 (0x9e00 << 16) | (0xc90c >> 2),
343 (0xae00 << 16) | (0xc90c >> 2),
345 (0xbe00 << 16) | (0xc90c >> 2),
347 (0x4e00 << 16) | (0xc910 >> 2),
349 (0x5e00 << 16) | (0xc910 >> 2),
351 (0x6e00 << 16) | (0xc910 >> 2),
353 (0x7e00 << 16) | (0xc910 >> 2),
355 (0x8e00 << 16) | (0xc910 >> 2),
357 (0x9e00 << 16) | (0xc910 >> 2),
359 (0xae00 << 16) | (0xc910 >> 2),
361 (0xbe00 << 16) | (0xc910 >> 2),
363 (0x0e00 << 16) | (0xc99c >> 2),
365 (0x0e00 << 16) | (0x9834 >> 2),
367 (0x0000 << 16) | (0x30f00 >> 2),
369 (0x0001 << 16) | (0x30f00 >> 2),
371 (0x0000 << 16) | (0x30f04 >> 2),
373 (0x0001 << 16) | (0x30f04 >> 2),
375 (0x0000 << 16) | (0x30f08 >> 2),
377 (0x0001 << 16) | (0x30f08 >> 2),
379 (0x0000 << 16) | (0x30f0c >> 2),
381 (0x0001 << 16) | (0x30f0c >> 2),
383 (0x0600 << 16) | (0x9b7c >> 2),
385 (0x0e00 << 16) | (0x8a14 >> 2),
387 (0x0e00 << 16) | (0x8a18 >> 2),
389 (0x0600 << 16) | (0x30a00 >> 2),
391 (0x0e00 << 16) | (0x8bf0 >> 2),
393 (0x0e00 << 16) | (0x8bcc >> 2),
395 (0x0e00 << 16) | (0x8b24 >> 2),
397 (0x0e00 << 16) | (0x30a04 >> 2),
399 (0x0600 << 16) | (0x30a10 >> 2),
401 (0x0600 << 16) | (0x30a14 >> 2),
403 (0x0600 << 16) | (0x30a18 >> 2),
405 (0x0600 << 16) | (0x30a2c >> 2),
407 (0x0e00 << 16) | (0xc700 >> 2),
409 (0x0e00 << 16) | (0xc704 >> 2),
411 (0x0e00 << 16) | (0xc708 >> 2),
413 (0x0e00 << 16) | (0xc768 >> 2),
415 (0x0400 << 16) | (0xc770 >> 2),
417 (0x0400 << 16) | (0xc774 >> 2),
419 (0x0400 << 16) | (0xc778 >> 2),
421 (0x0400 << 16) | (0xc77c >> 2),
423 (0x0400 << 16) | (0xc780 >> 2),
425 (0x0400 << 16) | (0xc784 >> 2),
427 (0x0400 << 16) | (0xc788 >> 2),
429 (0x0400 << 16) | (0xc78c >> 2),
431 (0x0400 << 16) | (0xc798 >> 2),
433 (0x0400 << 16) | (0xc79c >> 2),
435 (0x0400 << 16) | (0xc7a0 >> 2),
437 (0x0400 << 16) | (0xc7a4 >> 2),
439 (0x0400 << 16) | (0xc7a8 >> 2),
441 (0x0400 << 16) | (0xc7ac >> 2),
443 (0x0400 << 16) | (0xc7b0 >> 2),
445 (0x0400 << 16) | (0xc7b4 >> 2),
447 (0x0e00 << 16) | (0x9100 >> 2),
449 (0x0e00 << 16) | (0x3c010 >> 2),
451 (0x0e00 << 16) | (0x92a8 >> 2),
453 (0x0e00 << 16) | (0x92ac >> 2),
455 (0x0e00 << 16) | (0x92b4 >> 2),
457 (0x0e00 << 16) | (0x92b8 >> 2),
459 (0x0e00 << 16) | (0x92bc >> 2),
461 (0x0e00 << 16) | (0x92c0 >> 2),
463 (0x0e00 << 16) | (0x92c4 >> 2),
465 (0x0e00 << 16) | (0x92c8 >> 2),
467 (0x0e00 << 16) | (0x92cc >> 2),
469 (0x0e00 << 16) | (0x92d0 >> 2),
471 (0x0e00 << 16) | (0x8c00 >> 2),
473 (0x0e00 << 16) | (0x8c04 >> 2),
475 (0x0e00 << 16) | (0x8c20 >> 2),
477 (0x0e00 << 16) | (0x8c38 >> 2),
479 (0x0e00 << 16) | (0x8c3c >> 2),
481 (0x0e00 << 16) | (0xae00 >> 2),
483 (0x0e00 << 16) | (0x9604 >> 2),
485 (0x0e00 << 16) | (0xac08 >> 2),
487 (0x0e00 << 16) | (0xac0c >> 2),
489 (0x0e00 << 16) | (0xac10 >> 2),
491 (0x0e00 << 16) | (0xac14 >> 2),
493 (0x0e00 << 16) | (0xac58 >> 2),
495 (0x0e00 << 16) | (0xac68 >> 2),
497 (0x0e00 << 16) | (0xac6c >> 2),
499 (0x0e00 << 16) | (0xac70 >> 2),
501 (0x0e00 << 16) | (0xac74 >> 2),
503 (0x0e00 << 16) | (0xac78 >> 2),
505 (0x0e00 << 16) | (0xac7c >> 2),
507 (0x0e00 << 16) | (0xac80 >> 2),
509 (0x0e00 << 16) | (0xac84 >> 2),
511 (0x0e00 << 16) | (0xac88 >> 2),
513 (0x0e00 << 16) | (0xac8c >> 2),
515 (0x0e00 << 16) | (0x970c >> 2),
517 (0x0e00 << 16) | (0x9714 >> 2),
519 (0x0e00 << 16) | (0x9718 >> 2),
521 (0x0e00 << 16) | (0x971c >> 2),
523 (0x0e00 << 16) | (0x31068 >> 2),
525 (0x4e00 << 16) | (0x31068 >> 2),
527 (0x5e00 << 16) | (0x31068 >> 2),
529 (0x6e00 << 16) | (0x31068 >> 2),
531 (0x7e00 << 16) | (0x31068 >> 2),
533 (0x8e00 << 16) | (0x31068 >> 2),
535 (0x9e00 << 16) | (0x31068 >> 2),
537 (0xae00 << 16) | (0x31068 >> 2),
539 (0xbe00 << 16) | (0x31068 >> 2),
541 (0x0e00 << 16) | (0xcd10 >> 2),
543 (0x0e00 << 16) | (0xcd14 >> 2),
545 (0x0e00 << 16) | (0x88b0 >> 2),
547 (0x0e00 << 16) | (0x88b4 >> 2),
549 (0x0e00 << 16) | (0x88b8 >> 2),
551 (0x0e00 << 16) | (0x88bc >> 2),
553 (0x0400 << 16) | (0x89c0 >> 2),
555 (0x0e00 << 16) | (0x88c4 >> 2),
557 (0x0e00 << 16) | (0x88c8 >> 2),
559 (0x0e00 << 16) | (0x88d0 >> 2),
561 (0x0e00 << 16) | (0x88d4 >> 2),
563 (0x0e00 << 16) | (0x88d8 >> 2),
565 (0x0e00 << 16) | (0x8980 >> 2),
567 (0x0e00 << 16) | (0x30938 >> 2),
569 (0x0e00 << 16) | (0x3093c >> 2),
571 (0x0e00 << 16) | (0x30940 >> 2),
573 (0x0e00 << 16) | (0x89a0 >> 2),
575 (0x0e00 << 16) | (0x30900 >> 2),
577 (0x0e00 << 16) | (0x30904 >> 2),
579 (0x0e00 << 16) | (0x89b4 >> 2),
581 (0x0e00 << 16) | (0x3c210 >> 2),
583 (0x0e00 << 16) | (0x3c214 >> 2),
585 (0x0e00 << 16) | (0x3c218 >> 2),
587 (0x0e00 << 16) | (0x8904 >> 2),
590 (0x0e00 << 16) | (0x8c28 >> 2),
591 (0x0e00 << 16) | (0x8c2c >> 2),
592 (0x0e00 << 16) | (0x8c30 >> 2),
593 (0x0e00 << 16) | (0x8c34 >> 2),
594 (0x0e00 << 16) | (0x9600 >> 2),
597 static const u32 kalindi_rlc_save_restore_register_list[] =
599 (0x0e00 << 16) | (0xc12c >> 2),
601 (0x0e00 << 16) | (0xc140 >> 2),
603 (0x0e00 << 16) | (0xc150 >> 2),
605 (0x0e00 << 16) | (0xc15c >> 2),
607 (0x0e00 << 16) | (0xc168 >> 2),
609 (0x0e00 << 16) | (0xc170 >> 2),
611 (0x0e00 << 16) | (0xc204 >> 2),
613 (0x0e00 << 16) | (0xc2b4 >> 2),
615 (0x0e00 << 16) | (0xc2b8 >> 2),
617 (0x0e00 << 16) | (0xc2bc >> 2),
619 (0x0e00 << 16) | (0xc2c0 >> 2),
621 (0x0e00 << 16) | (0x8228 >> 2),
623 (0x0e00 << 16) | (0x829c >> 2),
625 (0x0e00 << 16) | (0x869c >> 2),
627 (0x0600 << 16) | (0x98f4 >> 2),
629 (0x0e00 << 16) | (0x98f8 >> 2),
631 (0x0e00 << 16) | (0x9900 >> 2),
633 (0x0e00 << 16) | (0xc260 >> 2),
635 (0x0e00 << 16) | (0x90e8 >> 2),
637 (0x0e00 << 16) | (0x3c000 >> 2),
639 (0x0e00 << 16) | (0x3c00c >> 2),
641 (0x0e00 << 16) | (0x8c1c >> 2),
643 (0x0e00 << 16) | (0x9700 >> 2),
645 (0x0e00 << 16) | (0xcd20 >> 2),
647 (0x4e00 << 16) | (0xcd20 >> 2),
649 (0x5e00 << 16) | (0xcd20 >> 2),
651 (0x6e00 << 16) | (0xcd20 >> 2),
653 (0x7e00 << 16) | (0xcd20 >> 2),
655 (0x0e00 << 16) | (0x89bc >> 2),
657 (0x0e00 << 16) | (0x8900 >> 2),
660 (0x0e00 << 16) | (0xc130 >> 2),
662 (0x0e00 << 16) | (0xc134 >> 2),
664 (0x0e00 << 16) | (0xc1fc >> 2),
666 (0x0e00 << 16) | (0xc208 >> 2),
668 (0x0e00 << 16) | (0xc264 >> 2),
670 (0x0e00 << 16) | (0xc268 >> 2),
672 (0x0e00 << 16) | (0xc26c >> 2),
674 (0x0e00 << 16) | (0xc270 >> 2),
676 (0x0e00 << 16) | (0xc274 >> 2),
678 (0x0e00 << 16) | (0xc28c >> 2),
680 (0x0e00 << 16) | (0xc290 >> 2),
682 (0x0e00 << 16) | (0xc294 >> 2),
684 (0x0e00 << 16) | (0xc298 >> 2),
686 (0x0e00 << 16) | (0xc2a0 >> 2),
688 (0x0e00 << 16) | (0xc2a4 >> 2),
690 (0x0e00 << 16) | (0xc2a8 >> 2),
692 (0x0e00 << 16) | (0xc2ac >> 2),
694 (0x0e00 << 16) | (0x301d0 >> 2),
696 (0x0e00 << 16) | (0x30238 >> 2),
698 (0x0e00 << 16) | (0x30250 >> 2),
700 (0x0e00 << 16) | (0x30254 >> 2),
702 (0x0e00 << 16) | (0x30258 >> 2),
704 (0x0e00 << 16) | (0x3025c >> 2),
706 (0x4e00 << 16) | (0xc900 >> 2),
708 (0x5e00 << 16) | (0xc900 >> 2),
710 (0x6e00 << 16) | (0xc900 >> 2),
712 (0x7e00 << 16) | (0xc900 >> 2),
714 (0x4e00 << 16) | (0xc904 >> 2),
716 (0x5e00 << 16) | (0xc904 >> 2),
718 (0x6e00 << 16) | (0xc904 >> 2),
720 (0x7e00 << 16) | (0xc904 >> 2),
722 (0x4e00 << 16) | (0xc908 >> 2),
724 (0x5e00 << 16) | (0xc908 >> 2),
726 (0x6e00 << 16) | (0xc908 >> 2),
728 (0x7e00 << 16) | (0xc908 >> 2),
730 (0x4e00 << 16) | (0xc90c >> 2),
732 (0x5e00 << 16) | (0xc90c >> 2),
734 (0x6e00 << 16) | (0xc90c >> 2),
736 (0x7e00 << 16) | (0xc90c >> 2),
738 (0x4e00 << 16) | (0xc910 >> 2),
740 (0x5e00 << 16) | (0xc910 >> 2),
742 (0x6e00 << 16) | (0xc910 >> 2),
744 (0x7e00 << 16) | (0xc910 >> 2),
746 (0x0e00 << 16) | (0xc99c >> 2),
748 (0x0e00 << 16) | (0x9834 >> 2),
750 (0x0000 << 16) | (0x30f00 >> 2),
752 (0x0000 << 16) | (0x30f04 >> 2),
754 (0x0000 << 16) | (0x30f08 >> 2),
756 (0x0000 << 16) | (0x30f0c >> 2),
758 (0x0600 << 16) | (0x9b7c >> 2),
760 (0x0e00 << 16) | (0x8a14 >> 2),
762 (0x0e00 << 16) | (0x8a18 >> 2),
764 (0x0600 << 16) | (0x30a00 >> 2),
766 (0x0e00 << 16) | (0x8bf0 >> 2),
768 (0x0e00 << 16) | (0x8bcc >> 2),
770 (0x0e00 << 16) | (0x8b24 >> 2),
772 (0x0e00 << 16) | (0x30a04 >> 2),
774 (0x0600 << 16) | (0x30a10 >> 2),
776 (0x0600 << 16) | (0x30a14 >> 2),
778 (0x0600 << 16) | (0x30a18 >> 2),
780 (0x0600 << 16) | (0x30a2c >> 2),
782 (0x0e00 << 16) | (0xc700 >> 2),
784 (0x0e00 << 16) | (0xc704 >> 2),
786 (0x0e00 << 16) | (0xc708 >> 2),
788 (0x0e00 << 16) | (0xc768 >> 2),
790 (0x0400 << 16) | (0xc770 >> 2),
792 (0x0400 << 16) | (0xc774 >> 2),
794 (0x0400 << 16) | (0xc798 >> 2),
796 (0x0400 << 16) | (0xc79c >> 2),
798 (0x0e00 << 16) | (0x9100 >> 2),
800 (0x0e00 << 16) | (0x3c010 >> 2),
802 (0x0e00 << 16) | (0x8c00 >> 2),
804 (0x0e00 << 16) | (0x8c04 >> 2),
806 (0x0e00 << 16) | (0x8c20 >> 2),
808 (0x0e00 << 16) | (0x8c38 >> 2),
810 (0x0e00 << 16) | (0x8c3c >> 2),
812 (0x0e00 << 16) | (0xae00 >> 2),
814 (0x0e00 << 16) | (0x9604 >> 2),
816 (0x0e00 << 16) | (0xac08 >> 2),
818 (0x0e00 << 16) | (0xac0c >> 2),
820 (0x0e00 << 16) | (0xac10 >> 2),
822 (0x0e00 << 16) | (0xac14 >> 2),
824 (0x0e00 << 16) | (0xac58 >> 2),
826 (0x0e00 << 16) | (0xac68 >> 2),
828 (0x0e00 << 16) | (0xac6c >> 2),
830 (0x0e00 << 16) | (0xac70 >> 2),
832 (0x0e00 << 16) | (0xac74 >> 2),
834 (0x0e00 << 16) | (0xac78 >> 2),
836 (0x0e00 << 16) | (0xac7c >> 2),
838 (0x0e00 << 16) | (0xac80 >> 2),
840 (0x0e00 << 16) | (0xac84 >> 2),
842 (0x0e00 << 16) | (0xac88 >> 2),
844 (0x0e00 << 16) | (0xac8c >> 2),
846 (0x0e00 << 16) | (0x970c >> 2),
848 (0x0e00 << 16) | (0x9714 >> 2),
850 (0x0e00 << 16) | (0x9718 >> 2),
852 (0x0e00 << 16) | (0x971c >> 2),
854 (0x0e00 << 16) | (0x31068 >> 2),
856 (0x4e00 << 16) | (0x31068 >> 2),
858 (0x5e00 << 16) | (0x31068 >> 2),
860 (0x6e00 << 16) | (0x31068 >> 2),
862 (0x7e00 << 16) | (0x31068 >> 2),
864 (0x0e00 << 16) | (0xcd10 >> 2),
866 (0x0e00 << 16) | (0xcd14 >> 2),
868 (0x0e00 << 16) | (0x88b0 >> 2),
870 (0x0e00 << 16) | (0x88b4 >> 2),
872 (0x0e00 << 16) | (0x88b8 >> 2),
874 (0x0e00 << 16) | (0x88bc >> 2),
876 (0x0400 << 16) | (0x89c0 >> 2),
878 (0x0e00 << 16) | (0x88c4 >> 2),
880 (0x0e00 << 16) | (0x88c8 >> 2),
882 (0x0e00 << 16) | (0x88d0 >> 2),
884 (0x0e00 << 16) | (0x88d4 >> 2),
886 (0x0e00 << 16) | (0x88d8 >> 2),
888 (0x0e00 << 16) | (0x8980 >> 2),
890 (0x0e00 << 16) | (0x30938 >> 2),
892 (0x0e00 << 16) | (0x3093c >> 2),
894 (0x0e00 << 16) | (0x30940 >> 2),
896 (0x0e00 << 16) | (0x89a0 >> 2),
898 (0x0e00 << 16) | (0x30900 >> 2),
900 (0x0e00 << 16) | (0x30904 >> 2),
902 (0x0e00 << 16) | (0x89b4 >> 2),
904 (0x0e00 << 16) | (0x3e1fc >> 2),
906 (0x0e00 << 16) | (0x3c210 >> 2),
908 (0x0e00 << 16) | (0x3c214 >> 2),
910 (0x0e00 << 16) | (0x3c218 >> 2),
912 (0x0e00 << 16) | (0x8904 >> 2),
915 (0x0e00 << 16) | (0x8c28 >> 2),
916 (0x0e00 << 16) | (0x8c2c >> 2),
917 (0x0e00 << 16) | (0x8c30 >> 2),
918 (0x0e00 << 16) | (0x8c34 >> 2),
919 (0x0e00 << 16) | (0x9600 >> 2),
922 static const u32 bonaire_golden_spm_registers[] =
924 0x30800, 0xe0ffffff, 0xe0000000
927 static const u32 bonaire_golden_common_registers[] =
929 0xc770, 0xffffffff, 0x00000800,
930 0xc774, 0xffffffff, 0x00000800,
931 0xc798, 0xffffffff, 0x00007fbf,
932 0xc79c, 0xffffffff, 0x00007faf
935 static const u32 bonaire_golden_registers[] =
937 0x3354, 0x00000333, 0x00000333,
938 0x3350, 0x000c0fc0, 0x00040200,
939 0x9a10, 0x00010000, 0x00058208,
940 0x3c000, 0xffff1fff, 0x00140000,
941 0x3c200, 0xfdfc0fff, 0x00000100,
942 0x3c234, 0x40000000, 0x40000200,
943 0x9830, 0xffffffff, 0x00000000,
944 0x9834, 0xf00fffff, 0x00000400,
945 0x9838, 0x0002021c, 0x00020200,
946 0xc78, 0x00000080, 0x00000000,
947 0x5bb0, 0x000000f0, 0x00000070,
948 0x5bc0, 0xf0311fff, 0x80300000,
949 0x98f8, 0x73773777, 0x12010001,
950 0x350c, 0x00810000, 0x408af000,
951 0x7030, 0x31000111, 0x00000011,
952 0x2f48, 0x73773777, 0x12010001,
953 0x220c, 0x00007fb6, 0x0021a1b1,
954 0x2210, 0x00007fb6, 0x002021b1,
955 0x2180, 0x00007fb6, 0x00002191,
956 0x2218, 0x00007fb6, 0x002121b1,
957 0x221c, 0x00007fb6, 0x002021b1,
958 0x21dc, 0x00007fb6, 0x00002191,
959 0x21e0, 0x00007fb6, 0x00002191,
960 0x3628, 0x0000003f, 0x0000000a,
961 0x362c, 0x0000003f, 0x0000000a,
962 0x2ae4, 0x00073ffe, 0x000022a2,
963 0x240c, 0x000007ff, 0x00000000,
964 0x8a14, 0xf000003f, 0x00000007,
965 0x8bf0, 0x00002001, 0x00000001,
966 0x8b24, 0xffffffff, 0x00ffffff,
967 0x30a04, 0x0000ff0f, 0x00000000,
968 0x28a4c, 0x07ffffff, 0x06000000,
969 0x4d8, 0x00000fff, 0x00000100,
970 0x3e78, 0x00000001, 0x00000002,
971 0x9100, 0x03000000, 0x0362c688,
972 0x8c00, 0x000000ff, 0x00000001,
973 0xe40, 0x00001fff, 0x00001fff,
974 0x9060, 0x0000007f, 0x00000020,
975 0x9508, 0x00010000, 0x00010000,
976 0xac14, 0x000003ff, 0x000000f3,
977 0xac0c, 0xffffffff, 0x00001032
980 static const u32 bonaire_mgcg_cgcg_init[] =
982 0xc420, 0xffffffff, 0xfffffffc,
983 0x30800, 0xffffffff, 0xe0000000,
984 0x3c2a0, 0xffffffff, 0x00000100,
985 0x3c208, 0xffffffff, 0x00000100,
986 0x3c2c0, 0xffffffff, 0xc0000100,
987 0x3c2c8, 0xffffffff, 0xc0000100,
988 0x3c2c4, 0xffffffff, 0xc0000100,
989 0x55e4, 0xffffffff, 0x00600100,
990 0x3c280, 0xffffffff, 0x00000100,
991 0x3c214, 0xffffffff, 0x06000100,
992 0x3c220, 0xffffffff, 0x00000100,
993 0x3c218, 0xffffffff, 0x06000100,
994 0x3c204, 0xffffffff, 0x00000100,
995 0x3c2e0, 0xffffffff, 0x00000100,
996 0x3c224, 0xffffffff, 0x00000100,
997 0x3c200, 0xffffffff, 0x00000100,
998 0x3c230, 0xffffffff, 0x00000100,
999 0x3c234, 0xffffffff, 0x00000100,
1000 0x3c250, 0xffffffff, 0x00000100,
1001 0x3c254, 0xffffffff, 0x00000100,
1002 0x3c258, 0xffffffff, 0x00000100,
1003 0x3c25c, 0xffffffff, 0x00000100,
1004 0x3c260, 0xffffffff, 0x00000100,
1005 0x3c27c, 0xffffffff, 0x00000100,
1006 0x3c278, 0xffffffff, 0x00000100,
1007 0x3c210, 0xffffffff, 0x06000100,
1008 0x3c290, 0xffffffff, 0x00000100,
1009 0x3c274, 0xffffffff, 0x00000100,
1010 0x3c2b4, 0xffffffff, 0x00000100,
1011 0x3c2b0, 0xffffffff, 0x00000100,
1012 0x3c270, 0xffffffff, 0x00000100,
1013 0x30800, 0xffffffff, 0xe0000000,
1014 0x3c020, 0xffffffff, 0x00010000,
1015 0x3c024, 0xffffffff, 0x00030002,
1016 0x3c028, 0xffffffff, 0x00040007,
1017 0x3c02c, 0xffffffff, 0x00060005,
1018 0x3c030, 0xffffffff, 0x00090008,
1019 0x3c034, 0xffffffff, 0x00010000,
1020 0x3c038, 0xffffffff, 0x00030002,
1021 0x3c03c, 0xffffffff, 0x00040007,
1022 0x3c040, 0xffffffff, 0x00060005,
1023 0x3c044, 0xffffffff, 0x00090008,
1024 0x3c048, 0xffffffff, 0x00010000,
1025 0x3c04c, 0xffffffff, 0x00030002,
1026 0x3c050, 0xffffffff, 0x00040007,
1027 0x3c054, 0xffffffff, 0x00060005,
1028 0x3c058, 0xffffffff, 0x00090008,
1029 0x3c05c, 0xffffffff, 0x00010000,
1030 0x3c060, 0xffffffff, 0x00030002,
1031 0x3c064, 0xffffffff, 0x00040007,
1032 0x3c068, 0xffffffff, 0x00060005,
1033 0x3c06c, 0xffffffff, 0x00090008,
1034 0x3c070, 0xffffffff, 0x00010000,
1035 0x3c074, 0xffffffff, 0x00030002,
1036 0x3c078, 0xffffffff, 0x00040007,
1037 0x3c07c, 0xffffffff, 0x00060005,
1038 0x3c080, 0xffffffff, 0x00090008,
1039 0x3c084, 0xffffffff, 0x00010000,
1040 0x3c088, 0xffffffff, 0x00030002,
1041 0x3c08c, 0xffffffff, 0x00040007,
1042 0x3c090, 0xffffffff, 0x00060005,
1043 0x3c094, 0xffffffff, 0x00090008,
1044 0x3c098, 0xffffffff, 0x00010000,
1045 0x3c09c, 0xffffffff, 0x00030002,
1046 0x3c0a0, 0xffffffff, 0x00040007,
1047 0x3c0a4, 0xffffffff, 0x00060005,
1048 0x3c0a8, 0xffffffff, 0x00090008,
1049 0x3c000, 0xffffffff, 0x96e00200,
1050 0x8708, 0xffffffff, 0x00900100,
1051 0xc424, 0xffffffff, 0x0020003f,
1052 0x38, 0xffffffff, 0x0140001c,
1053 0x3c, 0x000f0000, 0x000f0000,
1054 0x220, 0xffffffff, 0xC060000C,
1055 0x224, 0xc0000fff, 0x00000100,
1056 0xf90, 0xffffffff, 0x00000100,
1057 0xf98, 0x00000101, 0x00000000,
1058 0x20a8, 0xffffffff, 0x00000104,
1059 0x55e4, 0xff000fff, 0x00000100,
1060 0x30cc, 0xc0000fff, 0x00000104,
1061 0xc1e4, 0x00000001, 0x00000001,
1062 0xd00c, 0xff000ff0, 0x00000100,
1063 0xd80c, 0xff000ff0, 0x00000100
1066 static const u32 spectre_golden_spm_registers[] =
1068 0x30800, 0xe0ffffff, 0xe0000000
1071 static const u32 spectre_golden_common_registers[] =
1073 0xc770, 0xffffffff, 0x00000800,
1074 0xc774, 0xffffffff, 0x00000800,
1075 0xc798, 0xffffffff, 0x00007fbf,
1076 0xc79c, 0xffffffff, 0x00007faf
1079 static const u32 spectre_golden_registers[] =
1081 0x3c000, 0xffff1fff, 0x96940200,
1082 0x3c00c, 0xffff0001, 0xff000000,
1083 0x3c200, 0xfffc0fff, 0x00000100,
1084 0x6ed8, 0x00010101, 0x00010000,
1085 0x9834, 0xf00fffff, 0x00000400,
1086 0x9838, 0xfffffffc, 0x00020200,
1087 0x5bb0, 0x000000f0, 0x00000070,
1088 0x5bc0, 0xf0311fff, 0x80300000,
1089 0x98f8, 0x73773777, 0x12010001,
1090 0x9b7c, 0x00ff0000, 0x00fc0000,
1091 0x2f48, 0x73773777, 0x12010001,
1092 0x8a14, 0xf000003f, 0x00000007,
1093 0x8b24, 0xffffffff, 0x00ffffff,
1094 0x28350, 0x3f3f3fff, 0x00000082,
1095 0x28355, 0x0000003f, 0x00000000,
1096 0x3e78, 0x00000001, 0x00000002,
1097 0x913c, 0xffff03df, 0x00000004,
1098 0xc768, 0x00000008, 0x00000008,
1099 0x8c00, 0x000008ff, 0x00000800,
1100 0x9508, 0x00010000, 0x00010000,
1101 0xac0c, 0xffffffff, 0x54763210,
1102 0x214f8, 0x01ff01ff, 0x00000002,
1103 0x21498, 0x007ff800, 0x00200000,
1104 0x2015c, 0xffffffff, 0x00000f40,
1105 0x30934, 0xffffffff, 0x00000001
1108 static const u32 spectre_mgcg_cgcg_init[] =
1110 0xc420, 0xffffffff, 0xfffffffc,
1111 0x30800, 0xffffffff, 0xe0000000,
1112 0x3c2a0, 0xffffffff, 0x00000100,
1113 0x3c208, 0xffffffff, 0x00000100,
1114 0x3c2c0, 0xffffffff, 0x00000100,
1115 0x3c2c8, 0xffffffff, 0x00000100,
1116 0x3c2c4, 0xffffffff, 0x00000100,
1117 0x55e4, 0xffffffff, 0x00600100,
1118 0x3c280, 0xffffffff, 0x00000100,
1119 0x3c214, 0xffffffff, 0x06000100,
1120 0x3c220, 0xffffffff, 0x00000100,
1121 0x3c218, 0xffffffff, 0x06000100,
1122 0x3c204, 0xffffffff, 0x00000100,
1123 0x3c2e0, 0xffffffff, 0x00000100,
1124 0x3c224, 0xffffffff, 0x00000100,
1125 0x3c200, 0xffffffff, 0x00000100,
1126 0x3c230, 0xffffffff, 0x00000100,
1127 0x3c234, 0xffffffff, 0x00000100,
1128 0x3c250, 0xffffffff, 0x00000100,
1129 0x3c254, 0xffffffff, 0x00000100,
1130 0x3c258, 0xffffffff, 0x00000100,
1131 0x3c25c, 0xffffffff, 0x00000100,
1132 0x3c260, 0xffffffff, 0x00000100,
1133 0x3c27c, 0xffffffff, 0x00000100,
1134 0x3c278, 0xffffffff, 0x00000100,
1135 0x3c210, 0xffffffff, 0x06000100,
1136 0x3c290, 0xffffffff, 0x00000100,
1137 0x3c274, 0xffffffff, 0x00000100,
1138 0x3c2b4, 0xffffffff, 0x00000100,
1139 0x3c2b0, 0xffffffff, 0x00000100,
1140 0x3c270, 0xffffffff, 0x00000100,
1141 0x30800, 0xffffffff, 0xe0000000,
1142 0x3c020, 0xffffffff, 0x00010000,
1143 0x3c024, 0xffffffff, 0x00030002,
1144 0x3c028, 0xffffffff, 0x00040007,
1145 0x3c02c, 0xffffffff, 0x00060005,
1146 0x3c030, 0xffffffff, 0x00090008,
1147 0x3c034, 0xffffffff, 0x00010000,
1148 0x3c038, 0xffffffff, 0x00030002,
1149 0x3c03c, 0xffffffff, 0x00040007,
1150 0x3c040, 0xffffffff, 0x00060005,
1151 0x3c044, 0xffffffff, 0x00090008,
1152 0x3c048, 0xffffffff, 0x00010000,
1153 0x3c04c, 0xffffffff, 0x00030002,
1154 0x3c050, 0xffffffff, 0x00040007,
1155 0x3c054, 0xffffffff, 0x00060005,
1156 0x3c058, 0xffffffff, 0x00090008,
1157 0x3c05c, 0xffffffff, 0x00010000,
1158 0x3c060, 0xffffffff, 0x00030002,
1159 0x3c064, 0xffffffff, 0x00040007,
1160 0x3c068, 0xffffffff, 0x00060005,
1161 0x3c06c, 0xffffffff, 0x00090008,
1162 0x3c070, 0xffffffff, 0x00010000,
1163 0x3c074, 0xffffffff, 0x00030002,
1164 0x3c078, 0xffffffff, 0x00040007,
1165 0x3c07c, 0xffffffff, 0x00060005,
1166 0x3c080, 0xffffffff, 0x00090008,
1167 0x3c084, 0xffffffff, 0x00010000,
1168 0x3c088, 0xffffffff, 0x00030002,
1169 0x3c08c, 0xffffffff, 0x00040007,
1170 0x3c090, 0xffffffff, 0x00060005,
1171 0x3c094, 0xffffffff, 0x00090008,
1172 0x3c098, 0xffffffff, 0x00010000,
1173 0x3c09c, 0xffffffff, 0x00030002,
1174 0x3c0a0, 0xffffffff, 0x00040007,
1175 0x3c0a4, 0xffffffff, 0x00060005,
1176 0x3c0a8, 0xffffffff, 0x00090008,
1177 0x3c0ac, 0xffffffff, 0x00010000,
1178 0x3c0b0, 0xffffffff, 0x00030002,
1179 0x3c0b4, 0xffffffff, 0x00040007,
1180 0x3c0b8, 0xffffffff, 0x00060005,
1181 0x3c0bc, 0xffffffff, 0x00090008,
1182 0x3c000, 0xffffffff, 0x96e00200,
1183 0x8708, 0xffffffff, 0x00900100,
1184 0xc424, 0xffffffff, 0x0020003f,
1185 0x38, 0xffffffff, 0x0140001c,
1186 0x3c, 0x000f0000, 0x000f0000,
1187 0x220, 0xffffffff, 0xC060000C,
1188 0x224, 0xc0000fff, 0x00000100,
1189 0xf90, 0xffffffff, 0x00000100,
1190 0xf98, 0x00000101, 0x00000000,
1191 0x20a8, 0xffffffff, 0x00000104,
1192 0x55e4, 0xff000fff, 0x00000100,
1193 0x30cc, 0xc0000fff, 0x00000104,
1194 0xc1e4, 0x00000001, 0x00000001,
1195 0xd00c, 0xff000ff0, 0x00000100,
1196 0xd80c, 0xff000ff0, 0x00000100
1199 static const u32 kalindi_golden_spm_registers[] =
1201 0x30800, 0xe0ffffff, 0xe0000000
1204 static const u32 kalindi_golden_common_registers[] =
1206 0xc770, 0xffffffff, 0x00000800,
1207 0xc774, 0xffffffff, 0x00000800,
1208 0xc798, 0xffffffff, 0x00007fbf,
1209 0xc79c, 0xffffffff, 0x00007faf
1212 static const u32 kalindi_golden_registers[] =
1214 0x3c000, 0xffffdfff, 0x6e944040,
1215 0x55e4, 0xff607fff, 0xfc000100,
1216 0x3c220, 0xff000fff, 0x00000100,
1217 0x3c224, 0xff000fff, 0x00000100,
1218 0x3c200, 0xfffc0fff, 0x00000100,
1219 0x6ed8, 0x00010101, 0x00010000,
1220 0x9830, 0xffffffff, 0x00000000,
1221 0x9834, 0xf00fffff, 0x00000400,
1222 0x5bb0, 0x000000f0, 0x00000070,
1223 0x5bc0, 0xf0311fff, 0x80300000,
1224 0x98f8, 0x73773777, 0x12010001,
1225 0x98fc, 0xffffffff, 0x00000010,
1226 0x9b7c, 0x00ff0000, 0x00fc0000,
1227 0x8030, 0x00001f0f, 0x0000100a,
1228 0x2f48, 0x73773777, 0x12010001,
1229 0x2408, 0x000fffff, 0x000c007f,
1230 0x8a14, 0xf000003f, 0x00000007,
1231 0x8b24, 0x3fff3fff, 0x00ffcfff,
1232 0x30a04, 0x0000ff0f, 0x00000000,
1233 0x28a4c, 0x07ffffff, 0x06000000,
1234 0x4d8, 0x00000fff, 0x00000100,
1235 0x3e78, 0x00000001, 0x00000002,
1236 0xc768, 0x00000008, 0x00000008,
1237 0x8c00, 0x000000ff, 0x00000003,
1238 0x214f8, 0x01ff01ff, 0x00000002,
1239 0x21498, 0x007ff800, 0x00200000,
1240 0x2015c, 0xffffffff, 0x00000f40,
1241 0x88c4, 0x001f3ae3, 0x00000082,
1242 0x88d4, 0x0000001f, 0x00000010,
1243 0x30934, 0xffffffff, 0x00000000
1246 static const u32 kalindi_mgcg_cgcg_init[] =
1248 0xc420, 0xffffffff, 0xfffffffc,
1249 0x30800, 0xffffffff, 0xe0000000,
1250 0x3c2a0, 0xffffffff, 0x00000100,
1251 0x3c208, 0xffffffff, 0x00000100,
1252 0x3c2c0, 0xffffffff, 0x00000100,
1253 0x3c2c8, 0xffffffff, 0x00000100,
1254 0x3c2c4, 0xffffffff, 0x00000100,
1255 0x55e4, 0xffffffff, 0x00600100,
1256 0x3c280, 0xffffffff, 0x00000100,
1257 0x3c214, 0xffffffff, 0x06000100,
1258 0x3c220, 0xffffffff, 0x00000100,
1259 0x3c218, 0xffffffff, 0x06000100,
1260 0x3c204, 0xffffffff, 0x00000100,
1261 0x3c2e0, 0xffffffff, 0x00000100,
1262 0x3c224, 0xffffffff, 0x00000100,
1263 0x3c200, 0xffffffff, 0x00000100,
1264 0x3c230, 0xffffffff, 0x00000100,
1265 0x3c234, 0xffffffff, 0x00000100,
1266 0x3c250, 0xffffffff, 0x00000100,
1267 0x3c254, 0xffffffff, 0x00000100,
1268 0x3c258, 0xffffffff, 0x00000100,
1269 0x3c25c, 0xffffffff, 0x00000100,
1270 0x3c260, 0xffffffff, 0x00000100,
1271 0x3c27c, 0xffffffff, 0x00000100,
1272 0x3c278, 0xffffffff, 0x00000100,
1273 0x3c210, 0xffffffff, 0x06000100,
1274 0x3c290, 0xffffffff, 0x00000100,
1275 0x3c274, 0xffffffff, 0x00000100,
1276 0x3c2b4, 0xffffffff, 0x00000100,
1277 0x3c2b0, 0xffffffff, 0x00000100,
1278 0x3c270, 0xffffffff, 0x00000100,
1279 0x30800, 0xffffffff, 0xe0000000,
1280 0x3c020, 0xffffffff, 0x00010000,
1281 0x3c024, 0xffffffff, 0x00030002,
1282 0x3c028, 0xffffffff, 0x00040007,
1283 0x3c02c, 0xffffffff, 0x00060005,
1284 0x3c030, 0xffffffff, 0x00090008,
1285 0x3c034, 0xffffffff, 0x00010000,
1286 0x3c038, 0xffffffff, 0x00030002,
1287 0x3c03c, 0xffffffff, 0x00040007,
1288 0x3c040, 0xffffffff, 0x00060005,
1289 0x3c044, 0xffffffff, 0x00090008,
1290 0x3c000, 0xffffffff, 0x96e00200,
1291 0x8708, 0xffffffff, 0x00900100,
1292 0xc424, 0xffffffff, 0x0020003f,
1293 0x38, 0xffffffff, 0x0140001c,
1294 0x3c, 0x000f0000, 0x000f0000,
1295 0x220, 0xffffffff, 0xC060000C,
1296 0x224, 0xc0000fff, 0x00000100,
1297 0x20a8, 0xffffffff, 0x00000104,
1298 0x55e4, 0xff000fff, 0x00000100,
1299 0x30cc, 0xc0000fff, 0x00000104,
1300 0xc1e4, 0x00000001, 0x00000001,
1301 0xd00c, 0xff000ff0, 0x00000100,
1302 0xd80c, 0xff000ff0, 0x00000100
1305 static void cik_init_golden_registers(struct radeon_device *rdev)
1307 switch (rdev->family) {
1309 radeon_program_register_sequence(rdev,
1310 bonaire_mgcg_cgcg_init,
1311 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1312 radeon_program_register_sequence(rdev,
1313 bonaire_golden_registers,
1314 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1315 radeon_program_register_sequence(rdev,
1316 bonaire_golden_common_registers,
1317 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1318 radeon_program_register_sequence(rdev,
1319 bonaire_golden_spm_registers,
1320 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1323 radeon_program_register_sequence(rdev,
1324 kalindi_mgcg_cgcg_init,
1325 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1326 radeon_program_register_sequence(rdev,
1327 kalindi_golden_registers,
1328 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1329 radeon_program_register_sequence(rdev,
1330 kalindi_golden_common_registers,
1331 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1332 radeon_program_register_sequence(rdev,
1333 kalindi_golden_spm_registers,
1334 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1337 radeon_program_register_sequence(rdev,
1338 spectre_mgcg_cgcg_init,
1339 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1340 radeon_program_register_sequence(rdev,
1341 spectre_golden_registers,
1342 (const u32)ARRAY_SIZE(spectre_golden_registers));
1343 radeon_program_register_sequence(rdev,
1344 spectre_golden_common_registers,
1345 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1346 radeon_program_register_sequence(rdev,
1347 spectre_golden_spm_registers,
1348 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1356 * cik_get_xclk - get the xclk
1358 * @rdev: radeon_device pointer
1360 * Returns the reference clock used by the gfx engine
1363 u32 cik_get_xclk(struct radeon_device *rdev)
1365 u32 reference_clock = rdev->clock.spll.reference_freq;
1367 if (rdev->flags & RADEON_IS_IGP) {
1368 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1369 return reference_clock / 2;
1371 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1372 return reference_clock / 4;
1374 return reference_clock;
1378 * cik_mm_rdoorbell - read a doorbell dword
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1383 * Returns the value in the doorbell aperture at the
1384 * requested offset (CIK).
1386 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1388 if (offset < rdev->doorbell.size) {
1389 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1391 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1397 * cik_mm_wdoorbell - write a doorbell dword
1399 * @rdev: radeon_device pointer
1400 * @offset: byte offset into the aperture
1401 * @v: value to write
1403 * Writes @v to the doorbell aperture at the
1404 * requested offset (CIK).
1406 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1408 if (offset < rdev->doorbell.size) {
1409 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1411 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1415 #define BONAIRE_IO_MC_REGS_SIZE 36
1417 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1419 {0x00000070, 0x04400000},
1420 {0x00000071, 0x80c01803},
1421 {0x00000072, 0x00004004},
1422 {0x00000073, 0x00000100},
1423 {0x00000074, 0x00ff0000},
1424 {0x00000075, 0x34000000},
1425 {0x00000076, 0x08000014},
1426 {0x00000077, 0x00cc08ec},
1427 {0x00000078, 0x00000400},
1428 {0x00000079, 0x00000000},
1429 {0x0000007a, 0x04090000},
1430 {0x0000007c, 0x00000000},
1431 {0x0000007e, 0x4408a8e8},
1432 {0x0000007f, 0x00000304},
1433 {0x00000080, 0x00000000},
1434 {0x00000082, 0x00000001},
1435 {0x00000083, 0x00000002},
1436 {0x00000084, 0xf3e4f400},
1437 {0x00000085, 0x052024e3},
1438 {0x00000087, 0x00000000},
1439 {0x00000088, 0x01000000},
1440 {0x0000008a, 0x1c0a0000},
1441 {0x0000008b, 0xff010000},
1442 {0x0000008d, 0xffffefff},
1443 {0x0000008e, 0xfff3efff},
1444 {0x0000008f, 0xfff3efbf},
1445 {0x00000092, 0xf7ffffff},
1446 {0x00000093, 0xffffff7f},
1447 {0x00000095, 0x00101101},
1448 {0x00000096, 0x00000fff},
1449 {0x00000097, 0x00116fff},
1450 {0x00000098, 0x60010000},
1451 {0x00000099, 0x10010000},
1452 {0x0000009a, 0x00006000},
1453 {0x0000009b, 0x00001000},
1454 {0x0000009f, 0x00b48000}
1458 * cik_srbm_select - select specific register instances
1460 * @rdev: radeon_device pointer
1461 * @me: selected ME (micro engine)
1466 * Switches the currently active registers instances. Some
1467 * registers are instanced per VMID, others are instanced per
1468 * me/pipe/queue combination.
1470 static void cik_srbm_select(struct radeon_device *rdev,
1471 u32 me, u32 pipe, u32 queue, u32 vmid)
1473 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1476 QUEUEID(queue & 0x7));
1477 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1482 * ci_mc_load_microcode - load MC ucode into the hw
1484 * @rdev: radeon_device pointer
1486 * Load the GDDR MC ucode into the hw (CIK).
1487 * Returns 0 on success, error on failure.
1489 static int ci_mc_load_microcode(struct radeon_device *rdev)
1491 const __be32 *fw_data;
1492 u32 running, blackout = 0;
1494 int i, ucode_size, regs_size;
1499 switch (rdev->family) {
1502 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1503 ucode_size = CIK_MC_UCODE_SIZE;
1504 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1508 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1512 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1513 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1516 /* reset the engine and set to writable */
1517 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1520 /* load mc io regs */
1521 for (i = 0; i < regs_size; i++) {
1522 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1523 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1525 /* load the MC ucode */
1526 fw_data = (const __be32 *)rdev->mc_fw->data;
1527 for (i = 0; i < ucode_size; i++)
1528 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1530 /* put the engine back into the active state */
1531 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1532 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1533 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1535 /* wait for training to complete */
1536 for (i = 0; i < rdev->usec_timeout; i++) {
1537 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1541 for (i = 0; i < rdev->usec_timeout; i++) {
1542 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1548 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1555 * cik_init_microcode - load ucode images from disk
1557 * @rdev: radeon_device pointer
1559 * Use the firmware interface to load the ucode images into
1560 * the driver (not loaded into hw).
1561 * Returns 0 on success, error on failure.
1563 static int cik_init_microcode(struct radeon_device *rdev)
1565 const char *chip_name;
1566 size_t pfp_req_size, me_req_size, ce_req_size,
1567 mec_req_size, rlc_req_size, mc_req_size,
1568 sdma_req_size, smc_req_size;
1574 switch (rdev->family) {
1576 chip_name = "BONAIRE";
1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1582 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1587 chip_name = "KAVERI";
1588 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1589 me_req_size = CIK_ME_UCODE_SIZE * 4;
1590 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1591 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1592 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1593 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1596 chip_name = "KABINI";
1597 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1598 me_req_size = CIK_ME_UCODE_SIZE * 4;
1599 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1600 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1601 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1602 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1607 DRM_INFO("Loading %s Microcode\n", chip_name);
1609 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1610 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1613 if (rdev->pfp_fw->size != pfp_req_size) {
1615 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1616 rdev->pfp_fw->size, fw_name);
1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1622 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1625 if (rdev->me_fw->size != me_req_size) {
1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 rdev->me_fw->size, fw_name);
1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1633 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1636 if (rdev->ce_fw->size != ce_req_size) {
1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 rdev->ce_fw->size, fw_name);
1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1644 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1647 if (rdev->mec_fw->size != mec_req_size) {
1649 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1650 rdev->mec_fw->size, fw_name);
1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1655 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1658 if (rdev->rlc_fw->size != rlc_req_size) {
1660 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1661 rdev->rlc_fw->size, fw_name);
1665 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1666 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1669 if (rdev->sdma_fw->size != sdma_req_size) {
1671 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1672 rdev->sdma_fw->size, fw_name);
1676 /* No SMC, MC ucode on APUs */
1677 if (!(rdev->flags & RADEON_IS_IGP)) {
1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1679 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1682 if (rdev->mc_fw->size != mc_req_size) {
1684 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1685 rdev->mc_fw->size, fw_name);
1689 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1690 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1693 "smc: error loading firmware \"%s\"\n",
1695 release_firmware(rdev->smc_fw);
1696 rdev->smc_fw = NULL;
1697 } else if (rdev->smc_fw->size != smc_req_size) {
1699 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1700 rdev->smc_fw->size, fw_name);
1709 "cik_cp: Failed to load firmware \"%s\"\n",
1711 release_firmware(rdev->pfp_fw);
1712 rdev->pfp_fw = NULL;
1713 release_firmware(rdev->me_fw);
1715 release_firmware(rdev->ce_fw);
1717 release_firmware(rdev->rlc_fw);
1718 rdev->rlc_fw = NULL;
1719 release_firmware(rdev->mc_fw);
1721 release_firmware(rdev->smc_fw);
1722 rdev->smc_fw = NULL;
1731 * cik_tiling_mode_table_init - init the hw tiling table
1733 * @rdev: radeon_device pointer
1735 * Starting with SI, the tiling setup is done globally in a
1736 * set of 32 tiling modes. Rather than selecting each set of
1737 * parameters per surface as on older asics, we just select
1738 * which index in the tiling table we want to use, and the
1739 * surface uses those parameters (CIK).
1741 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1743 const u32 num_tile_mode_states = 32;
1744 const u32 num_secondary_tile_mode_states = 16;
1745 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1746 u32 num_pipe_configs;
1747 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1748 rdev->config.cik.max_shader_engines;
1750 switch (rdev->config.cik.mem_row_size_in_kb) {
1752 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1756 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1759 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1763 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1764 if (num_pipe_configs > 8)
1765 num_pipe_configs = 8; /* ??? */
1767 if (num_pipe_configs == 8) {
1768 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1769 switch (reg_offset) {
1771 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1773 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1777 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1779 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1780 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1783 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1785 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1789 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1791 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1792 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1795 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1796 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1797 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1798 TILE_SPLIT(split_equal_to_row_size));
1801 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1805 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1807 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1811 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1813 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1814 TILE_SPLIT(split_equal_to_row_size));
1817 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1818 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1821 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1822 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1825 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1826 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1827 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1831 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1832 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1833 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1837 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1838 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1843 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1847 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1849 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1853 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1854 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1855 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1859 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1860 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1861 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1865 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1869 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1871 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1875 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1876 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1877 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1881 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1882 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1883 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1890 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1891 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1893 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1894 switch (reg_offset) {
1896 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1899 NUM_BANKS(ADDR_SURF_16_BANK));
1902 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1903 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1904 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1905 NUM_BANKS(ADDR_SURF_16_BANK));
1908 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1911 NUM_BANKS(ADDR_SURF_16_BANK));
1914 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1917 NUM_BANKS(ADDR_SURF_16_BANK));
1920 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1923 NUM_BANKS(ADDR_SURF_8_BANK));
1926 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1929 NUM_BANKS(ADDR_SURF_4_BANK));
1932 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1935 NUM_BANKS(ADDR_SURF_2_BANK));
1938 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1941 NUM_BANKS(ADDR_SURF_16_BANK));
1944 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1947 NUM_BANKS(ADDR_SURF_16_BANK));
1950 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1953 NUM_BANKS(ADDR_SURF_16_BANK));
1956 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1959 NUM_BANKS(ADDR_SURF_16_BANK));
1962 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1965 NUM_BANKS(ADDR_SURF_8_BANK));
1968 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1971 NUM_BANKS(ADDR_SURF_4_BANK));
1974 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1977 NUM_BANKS(ADDR_SURF_2_BANK));
1983 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1985 } else if (num_pipe_configs == 4) {
1987 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1988 switch (reg_offset) {
1990 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1991 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1992 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1993 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1996 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1998 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1999 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2002 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2003 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2004 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2005 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2008 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2009 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2010 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2014 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2015 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2016 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2017 TILE_SPLIT(split_equal_to_row_size));
2020 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2021 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2024 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2025 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2027 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2030 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2031 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2032 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2033 TILE_SPLIT(split_equal_to_row_size));
2036 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2037 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2040 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2041 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2044 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2046 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2050 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2051 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2052 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2056 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2057 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2058 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2062 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2066 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2073 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2074 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2080 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2084 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2088 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2090 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2096 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2100 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2102 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2109 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2110 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2112 } else if (num_rbs < 4) {
2113 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2114 switch (reg_offset) {
2116 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2118 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2119 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2122 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2124 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2128 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2130 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2131 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2134 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2136 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2137 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2140 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2142 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2143 TILE_SPLIT(split_equal_to_row_size));
2146 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2147 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2150 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2151 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2152 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2156 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2157 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2158 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2159 TILE_SPLIT(split_equal_to_row_size));
2162 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2166 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2170 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2172 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2178 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2183 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2192 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2199 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2210 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2214 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2215 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2216 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2220 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2222 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2226 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2228 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2235 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2236 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2239 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2240 switch (reg_offset) {
2242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245 NUM_BANKS(ADDR_SURF_16_BANK));
2248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2251 NUM_BANKS(ADDR_SURF_16_BANK));
2254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 NUM_BANKS(ADDR_SURF_16_BANK));
2260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263 NUM_BANKS(ADDR_SURF_16_BANK));
2266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 NUM_BANKS(ADDR_SURF_16_BANK));
2272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275 NUM_BANKS(ADDR_SURF_8_BANK));
2278 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2281 NUM_BANKS(ADDR_SURF_4_BANK));
2284 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 NUM_BANKS(ADDR_SURF_16_BANK));
2290 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293 NUM_BANKS(ADDR_SURF_16_BANK));
2296 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299 NUM_BANKS(ADDR_SURF_16_BANK));
2302 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2305 NUM_BANKS(ADDR_SURF_16_BANK));
2308 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2311 NUM_BANKS(ADDR_SURF_16_BANK));
2314 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2317 NUM_BANKS(ADDR_SURF_8_BANK));
2320 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2323 NUM_BANKS(ADDR_SURF_4_BANK));
2329 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2331 } else if (num_pipe_configs == 2) {
2332 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2333 switch (reg_offset) {
2335 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2341 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2347 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2349 PIPE_CONFIG(ADDR_SURF_P2) |
2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2353 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2355 PIPE_CONFIG(ADDR_SURF_P2) |
2356 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2359 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2361 PIPE_CONFIG(ADDR_SURF_P2) |
2362 TILE_SPLIT(split_equal_to_row_size));
2365 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2369 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371 PIPE_CONFIG(ADDR_SURF_P2) |
2372 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2377 PIPE_CONFIG(ADDR_SURF_P2) |
2378 TILE_SPLIT(split_equal_to_row_size));
2381 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2384 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2388 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2390 PIPE_CONFIG(ADDR_SURF_P2) |
2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2394 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2395 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2396 PIPE_CONFIG(ADDR_SURF_P2) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2402 PIPE_CONFIG(ADDR_SURF_P2) |
2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2410 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2412 PIPE_CONFIG(ADDR_SURF_P2) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418 PIPE_CONFIG(ADDR_SURF_P2) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2424 PIPE_CONFIG(ADDR_SURF_P2) |
2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2432 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434 PIPE_CONFIG(ADDR_SURF_P2) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440 PIPE_CONFIG(ADDR_SURF_P2) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446 PIPE_CONFIG(ADDR_SURF_P2) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2454 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2456 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2457 switch (reg_offset) {
2459 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2462 NUM_BANKS(ADDR_SURF_16_BANK));
2465 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2468 NUM_BANKS(ADDR_SURF_16_BANK));
2471 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474 NUM_BANKS(ADDR_SURF_16_BANK));
2477 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2480 NUM_BANKS(ADDR_SURF_16_BANK));
2483 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2486 NUM_BANKS(ADDR_SURF_16_BANK));
2489 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2492 NUM_BANKS(ADDR_SURF_16_BANK));
2495 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2498 NUM_BANKS(ADDR_SURF_8_BANK));
2501 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2504 NUM_BANKS(ADDR_SURF_16_BANK));
2507 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 NUM_BANKS(ADDR_SURF_16_BANK));
2513 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2516 NUM_BANKS(ADDR_SURF_16_BANK));
2519 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2522 NUM_BANKS(ADDR_SURF_16_BANK));
2525 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2528 NUM_BANKS(ADDR_SURF_16_BANK));
2531 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534 NUM_BANKS(ADDR_SURF_16_BANK));
2537 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2540 NUM_BANKS(ADDR_SURF_8_BANK));
2546 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2549 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2553 * cik_select_se_sh - select which SE, SH to address
2555 * @rdev: radeon_device pointer
2556 * @se_num: shader engine to address
2557 * @sh_num: sh block to address
2559 * Select which SE, SH combinations to address. Certain
2560 * registers are instanced per SE or SH. 0xffffffff means
2561 * broadcast to all SEs or SHs (CIK).
2563 static void cik_select_se_sh(struct radeon_device *rdev,
2564 u32 se_num, u32 sh_num)
2566 u32 data = INSTANCE_BROADCAST_WRITES;
2568 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2569 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2570 else if (se_num == 0xffffffff)
2571 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2572 else if (sh_num == 0xffffffff)
2573 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2575 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2576 WREG32(GRBM_GFX_INDEX, data);
2580 * cik_create_bitmask - create a bitmask
2582 * @bit_width: length of the mask
2584 * create a variable length bit mask (CIK).
2585 * Returns the bitmask.
2587 static u32 cik_create_bitmask(u32 bit_width)
2591 for (i = 0; i < bit_width; i++) {
2599 * cik_select_se_sh - select which SE, SH to address
2601 * @rdev: radeon_device pointer
2602 * @max_rb_num: max RBs (render backends) for the asic
2603 * @se_num: number of SEs (shader engines) for the asic
2604 * @sh_per_se: number of SH blocks per SE for the asic
2606 * Calculates the bitmask of disabled RBs (CIK).
2607 * Returns the disabled RB bitmask.
2609 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2610 u32 max_rb_num, u32 se_num,
2615 data = RREG32(CC_RB_BACKEND_DISABLE);
2617 data &= BACKEND_DISABLE_MASK;
2620 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2622 data >>= BACKEND_DISABLE_SHIFT;
2624 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2630 * cik_setup_rb - setup the RBs on the asic
2632 * @rdev: radeon_device pointer
2633 * @se_num: number of SEs (shader engines) for the asic
2634 * @sh_per_se: number of SH blocks per SE for the asic
2635 * @max_rb_num: max RBs (render backends) for the asic
2637 * Configures per-SE/SH RB registers (CIK).
2639 static void cik_setup_rb(struct radeon_device *rdev,
2640 u32 se_num, u32 sh_per_se,
2645 u32 disabled_rbs = 0;
2646 u32 enabled_rbs = 0;
2648 for (i = 0; i < se_num; i++) {
2649 for (j = 0; j < sh_per_se; j++) {
2650 cik_select_se_sh(rdev, i, j);
2651 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2652 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2655 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2658 for (i = 0; i < max_rb_num; i++) {
2659 if (!(disabled_rbs & mask))
2660 enabled_rbs |= mask;
2664 for (i = 0; i < se_num; i++) {
2665 cik_select_se_sh(rdev, i, 0xffffffff);
2667 for (j = 0; j < sh_per_se; j++) {
2668 switch (enabled_rbs & 3) {
2670 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2673 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2677 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2682 WREG32(PA_SC_RASTER_CONFIG, data);
2684 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2688 * cik_gpu_init - setup the 3D engine
2690 * @rdev: radeon_device pointer
2692 * Configures the 3D engine and tiling configuration
2693 * registers so that the 3D engine is usable.
2695 static void cik_gpu_init(struct radeon_device *rdev)
2697 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2698 u32 mc_shared_chmap, mc_arb_ramcfg;
2699 u32 hdp_host_path_cntl;
2703 switch (rdev->family) {
2705 rdev->config.cik.max_shader_engines = 2;
2706 rdev->config.cik.max_tile_pipes = 4;
2707 rdev->config.cik.max_cu_per_sh = 7;
2708 rdev->config.cik.max_sh_per_se = 1;
2709 rdev->config.cik.max_backends_per_se = 2;
2710 rdev->config.cik.max_texture_channel_caches = 4;
2711 rdev->config.cik.max_gprs = 256;
2712 rdev->config.cik.max_gs_threads = 32;
2713 rdev->config.cik.max_hw_contexts = 8;
2715 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2716 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2717 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2718 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2719 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2722 rdev->config.cik.max_shader_engines = 1;
2723 rdev->config.cik.max_tile_pipes = 4;
2724 if ((rdev->pdev->device == 0x1304) ||
2725 (rdev->pdev->device == 0x1305) ||
2726 (rdev->pdev->device == 0x130C) ||
2727 (rdev->pdev->device == 0x130F) ||
2728 (rdev->pdev->device == 0x1310) ||
2729 (rdev->pdev->device == 0x1311) ||
2730 (rdev->pdev->device == 0x131C)) {
2731 rdev->config.cik.max_cu_per_sh = 8;
2732 rdev->config.cik.max_backends_per_se = 2;
2733 } else if ((rdev->pdev->device == 0x1309) ||
2734 (rdev->pdev->device == 0x130A) ||
2735 (rdev->pdev->device == 0x130D) ||
2736 (rdev->pdev->device == 0x1313) ||
2737 (rdev->pdev->device == 0x131D)) {
2738 rdev->config.cik.max_cu_per_sh = 6;
2739 rdev->config.cik.max_backends_per_se = 2;
2740 } else if ((rdev->pdev->device == 0x1306) ||
2741 (rdev->pdev->device == 0x1307) ||
2742 (rdev->pdev->device == 0x130B) ||
2743 (rdev->pdev->device == 0x130E) ||
2744 (rdev->pdev->device == 0x1315) ||
2745 (rdev->pdev->device == 0x131B)) {
2746 rdev->config.cik.max_cu_per_sh = 4;
2747 rdev->config.cik.max_backends_per_se = 1;
2749 rdev->config.cik.max_cu_per_sh = 3;
2750 rdev->config.cik.max_backends_per_se = 1;
2752 rdev->config.cik.max_sh_per_se = 1;
2753 rdev->config.cik.max_texture_channel_caches = 4;
2754 rdev->config.cik.max_gprs = 256;
2755 rdev->config.cik.max_gs_threads = 16;
2756 rdev->config.cik.max_hw_contexts = 8;
2758 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2759 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2760 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2761 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2762 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2766 rdev->config.cik.max_shader_engines = 1;
2767 rdev->config.cik.max_tile_pipes = 2;
2768 rdev->config.cik.max_cu_per_sh = 2;
2769 rdev->config.cik.max_sh_per_se = 1;
2770 rdev->config.cik.max_backends_per_se = 1;
2771 rdev->config.cik.max_texture_channel_caches = 2;
2772 rdev->config.cik.max_gprs = 256;
2773 rdev->config.cik.max_gs_threads = 16;
2774 rdev->config.cik.max_hw_contexts = 8;
2776 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2777 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2778 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2779 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2780 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2784 /* Initialize HDP */
2785 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2786 WREG32((0x2c14 + j), 0x00000000);
2787 WREG32((0x2c18 + j), 0x00000000);
2788 WREG32((0x2c1c + j), 0x00000000);
2789 WREG32((0x2c20 + j), 0x00000000);
2790 WREG32((0x2c24 + j), 0x00000000);
2793 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2795 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2797 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2798 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2800 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2801 rdev->config.cik.mem_max_burst_length_bytes = 256;
2802 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2803 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2804 if (rdev->config.cik.mem_row_size_in_kb > 4)
2805 rdev->config.cik.mem_row_size_in_kb = 4;
2806 /* XXX use MC settings? */
2807 rdev->config.cik.shader_engine_tile_size = 32;
2808 rdev->config.cik.num_gpus = 1;
2809 rdev->config.cik.multi_gpu_tile_size = 64;
2811 /* fix up row size */
2812 gb_addr_config &= ~ROW_SIZE_MASK;
2813 switch (rdev->config.cik.mem_row_size_in_kb) {
2816 gb_addr_config |= ROW_SIZE(0);
2819 gb_addr_config |= ROW_SIZE(1);
2822 gb_addr_config |= ROW_SIZE(2);
2826 /* setup tiling info dword. gb_addr_config is not adequate since it does
2827 * not have bank info, so create a custom tiling dword.
2828 * bits 3:0 num_pipes
2829 * bits 7:4 num_banks
2830 * bits 11:8 group_size
2831 * bits 15:12 row_size
2833 rdev->config.cik.tile_config = 0;
2834 switch (rdev->config.cik.num_tile_pipes) {
2836 rdev->config.cik.tile_config |= (0 << 0);
2839 rdev->config.cik.tile_config |= (1 << 0);
2842 rdev->config.cik.tile_config |= (2 << 0);
2846 /* XXX what about 12? */
2847 rdev->config.cik.tile_config |= (3 << 0);
2850 rdev->config.cik.tile_config |=
2851 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2852 rdev->config.cik.tile_config |=
2853 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2854 rdev->config.cik.tile_config |=
2855 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2857 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2858 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2859 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2860 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2861 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2862 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2863 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2864 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2866 cik_tiling_mode_table_init(rdev);
2868 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2869 rdev->config.cik.max_sh_per_se,
2870 rdev->config.cik.max_backends_per_se);
2872 /* set HW defaults for 3D engine */
2873 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2875 WREG32(SX_DEBUG_1, 0x20);
2877 WREG32(TA_CNTL_AUX, 0x00010000);
2879 tmp = RREG32(SPI_CONFIG_CNTL);
2881 WREG32(SPI_CONFIG_CNTL, tmp);
2883 WREG32(SQ_CONFIG, 1);
2885 WREG32(DB_DEBUG, 0);
2887 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2889 WREG32(DB_DEBUG2, tmp);
2891 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2893 WREG32(DB_DEBUG3, tmp);
2895 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2897 WREG32(CB_HW_CONTROL, tmp);
2899 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2901 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2902 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2903 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2904 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2906 WREG32(VGT_NUM_INSTANCES, 1);
2908 WREG32(CP_PERFMON_CNTL, 0);
2910 WREG32(SQ_CONFIG, 0);
2912 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2913 FORCE_EOV_MAX_REZ_CNT(255)));
2915 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2916 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2918 WREG32(VGT_GS_VERTEX_REUSE, 16);
2919 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2921 tmp = RREG32(HDP_MISC_CNTL);
2922 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2923 WREG32(HDP_MISC_CNTL, tmp);
2925 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2926 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2928 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2929 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2935 * GPU scratch registers helpers function.
2938 * cik_scratch_init - setup driver info for CP scratch regs
2940 * @rdev: radeon_device pointer
2942 * Set up the number and offset of the CP scratch registers.
2943 * NOTE: use of CP scratch registers is a legacy inferface and
2944 * is not used by default on newer asics (r6xx+). On newer asics,
2945 * memory buffers are used for fences rather than scratch regs.
2947 static void cik_scratch_init(struct radeon_device *rdev)
2951 rdev->scratch.num_reg = 7;
2952 rdev->scratch.reg_base = SCRATCH_REG0;
2953 for (i = 0; i < rdev->scratch.num_reg; i++) {
2954 rdev->scratch.free[i] = true;
2955 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2960 * cik_ring_test - basic gfx ring test
2962 * @rdev: radeon_device pointer
2963 * @ring: radeon_ring structure holding ring information
2965 * Allocate a scratch register and write to it using the gfx ring (CIK).
2966 * Provides a basic gfx ring test to verify that the ring is working.
2967 * Used by cik_cp_gfx_resume();
2968 * Returns 0 on success, error on failure.
2970 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2977 r = radeon_scratch_get(rdev, &scratch);
2979 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2982 WREG32(scratch, 0xCAFEDEAD);
2983 r = radeon_ring_lock(rdev, ring, 3);
2985 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2986 radeon_scratch_free(rdev, scratch);
2989 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2990 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2991 radeon_ring_write(ring, 0xDEADBEEF);
2992 radeon_ring_unlock_commit(rdev, ring);
2994 for (i = 0; i < rdev->usec_timeout; i++) {
2995 tmp = RREG32(scratch);
2996 if (tmp == 0xDEADBEEF)
3000 if (i < rdev->usec_timeout) {
3001 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3003 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3004 ring->idx, scratch, tmp);
3007 radeon_scratch_free(rdev, scratch);
3012 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3014 * @rdev: radeon_device pointer
3015 * @fence: radeon fence object
3017 * Emits a fence sequnce number on the gfx ring and flushes
3020 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3021 struct radeon_fence *fence)
3023 struct radeon_ring *ring = &rdev->ring[fence->ring];
3024 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3026 /* EVENT_WRITE_EOP - flush caches, send int */
3027 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3028 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3030 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3032 radeon_ring_write(ring, addr & 0xfffffffc);
3033 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3034 radeon_ring_write(ring, fence->seq);
3035 radeon_ring_write(ring, 0);
3037 /* We should be using the new WAIT_REG_MEM special op packet here
3038 * but it causes the CP to hang
3040 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3041 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3042 WRITE_DATA_DST_SEL(0)));
3043 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3044 radeon_ring_write(ring, 0);
3045 radeon_ring_write(ring, 0);
3049 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3051 * @rdev: radeon_device pointer
3052 * @fence: radeon fence object
3054 * Emits a fence sequnce number on the compute ring and flushes
3057 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3058 struct radeon_fence *fence)
3060 struct radeon_ring *ring = &rdev->ring[fence->ring];
3061 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3063 /* RELEASE_MEM - flush caches, send int */
3064 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3065 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3067 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3069 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3070 radeon_ring_write(ring, addr & 0xfffffffc);
3071 radeon_ring_write(ring, upper_32_bits(addr));
3072 radeon_ring_write(ring, fence->seq);
3073 radeon_ring_write(ring, 0);
3075 /* We should be using the new WAIT_REG_MEM special op packet here
3076 * but it causes the CP to hang
3078 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3079 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3080 WRITE_DATA_DST_SEL(0)));
3081 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3082 radeon_ring_write(ring, 0);
3083 radeon_ring_write(ring, 0);
3086 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3087 struct radeon_ring *ring,
3088 struct radeon_semaphore *semaphore,
3091 uint64_t addr = semaphore->gpu_addr;
3092 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3094 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3095 radeon_ring_write(ring, addr & 0xffffffff);
3096 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3103 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3105 * @rdev: radeon_device pointer
3106 * @ib: radeon indirect buffer object
3108 * Emits an DE (drawing engine) or CE (constant engine) IB
3109 * on the gfx ring. IBs are usually generated by userspace
3110 * acceleration drivers and submitted to the kernel for
3111 * sheduling on the ring. This function schedules the IB
3112 * on the gfx ring for execution by the GPU.
3114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3116 struct radeon_ring *ring = &rdev->ring[ib->ring];
3117 u32 header, control = INDIRECT_BUFFER_VALID;
3119 if (ib->is_const_ib) {
3120 /* set switch buffer packet before const IB */
3121 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3122 radeon_ring_write(ring, 0);
3124 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3127 if (ring->rptr_save_reg) {
3128 next_rptr = ring->wptr + 3 + 4;
3129 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3130 radeon_ring_write(ring, ((ring->rptr_save_reg -
3131 PACKET3_SET_UCONFIG_REG_START) >> 2));
3132 radeon_ring_write(ring, next_rptr);
3133 } else if (rdev->wb.enabled) {
3134 next_rptr = ring->wptr + 5 + 4;
3135 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3136 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3137 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3138 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3139 radeon_ring_write(ring, next_rptr);
3142 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3145 control |= ib->length_dw |
3146 (ib->vm ? (ib->vm->id << 24) : 0);
3148 radeon_ring_write(ring, header);
3149 radeon_ring_write(ring,
3153 (ib->gpu_addr & 0xFFFFFFFC));
3154 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3155 radeon_ring_write(ring, control);
3159 * cik_ib_test - basic gfx ring IB test
3161 * @rdev: radeon_device pointer
3162 * @ring: radeon_ring structure holding ring information
3164 * Allocate an IB and execute it on the gfx ring (CIK).
3165 * Provides a basic gfx ring test to verify that IBs are working.
3166 * Returns 0 on success, error on failure.
3168 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3170 struct radeon_ib ib;
3176 r = radeon_scratch_get(rdev, &scratch);
3178 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3181 WREG32(scratch, 0xCAFEDEAD);
3182 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3184 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3187 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3188 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3189 ib.ptr[2] = 0xDEADBEEF;
3191 r = radeon_ib_schedule(rdev, &ib, NULL);
3193 radeon_scratch_free(rdev, scratch);
3194 radeon_ib_free(rdev, &ib);
3195 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3198 r = radeon_fence_wait(ib.fence, false);
3200 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3203 for (i = 0; i < rdev->usec_timeout; i++) {
3204 tmp = RREG32(scratch);
3205 if (tmp == 0xDEADBEEF)
3209 if (i < rdev->usec_timeout) {
3210 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3212 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3216 radeon_scratch_free(rdev, scratch);
3217 radeon_ib_free(rdev, &ib);
3223 * On CIK, gfx and compute now have independant command processors.
3226 * Gfx consists of a single ring and can process both gfx jobs and
3227 * compute jobs. The gfx CP consists of three microengines (ME):
3228 * PFP - Pre-Fetch Parser
3230 * CE - Constant Engine
3231 * The PFP and ME make up what is considered the Drawing Engine (DE).
3232 * The CE is an asynchronous engine used for updating buffer desciptors
3233 * used by the DE so that they can be loaded into cache in parallel
3234 * while the DE is processing state update packets.
3237 * The compute CP consists of two microengines (ME):
3238 * MEC1 - Compute MicroEngine 1
3239 * MEC2 - Compute MicroEngine 2
3240 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3241 * The queues are exposed to userspace and are programmed directly
3242 * by the compute runtime.
3245 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3247 * @rdev: radeon_device pointer
3248 * @enable: enable or disable the MEs
3250 * Halts or unhalts the gfx MEs.
3252 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3255 WREG32(CP_ME_CNTL, 0);
3257 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3258 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3264 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3266 * @rdev: radeon_device pointer
3268 * Loads the gfx PFP, ME, and CE ucode.
3269 * Returns 0 for success, -EINVAL if the ucode is not available.
3271 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3273 const __be32 *fw_data;
3276 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3279 cik_cp_gfx_enable(rdev, false);
3282 fw_data = (const __be32 *)rdev->pfp_fw->data;
3283 WREG32(CP_PFP_UCODE_ADDR, 0);
3284 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3285 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3286 WREG32(CP_PFP_UCODE_ADDR, 0);
3289 fw_data = (const __be32 *)rdev->ce_fw->data;
3290 WREG32(CP_CE_UCODE_ADDR, 0);
3291 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3292 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3293 WREG32(CP_CE_UCODE_ADDR, 0);
3296 fw_data = (const __be32 *)rdev->me_fw->data;
3297 WREG32(CP_ME_RAM_WADDR, 0);
3298 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3299 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3300 WREG32(CP_ME_RAM_WADDR, 0);
3302 WREG32(CP_PFP_UCODE_ADDR, 0);
3303 WREG32(CP_CE_UCODE_ADDR, 0);
3304 WREG32(CP_ME_RAM_WADDR, 0);
3305 WREG32(CP_ME_RAM_RADDR, 0);
3310 * cik_cp_gfx_start - start the gfx ring
3312 * @rdev: radeon_device pointer
3314 * Enables the ring and loads the clear state context and other
3315 * packets required to init the ring.
3316 * Returns 0 for success, error for failure.
3318 static int cik_cp_gfx_start(struct radeon_device *rdev)
3320 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3324 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3325 WREG32(CP_ENDIAN_SWAP, 0);
3326 WREG32(CP_DEVICE_ID, 1);
3328 cik_cp_gfx_enable(rdev, true);
3330 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3332 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3336 /* init the CE partitions. CE only used for gfx on CIK */
3337 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3338 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3339 radeon_ring_write(ring, 0xc000);
3340 radeon_ring_write(ring, 0xc000);
3342 /* setup clear context state */
3343 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3346 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3347 radeon_ring_write(ring, 0x80000000);
3348 radeon_ring_write(ring, 0x80000000);
3350 for (i = 0; i < cik_default_size; i++)
3351 radeon_ring_write(ring, cik_default_state[i]);
3353 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3354 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3356 /* set clear context state */
3357 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3358 radeon_ring_write(ring, 0);
3360 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3361 radeon_ring_write(ring, 0x00000316);
3362 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3363 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3365 radeon_ring_unlock_commit(rdev, ring);
3371 * cik_cp_gfx_fini - stop the gfx ring
3373 * @rdev: radeon_device pointer
3375 * Stop the gfx ring and tear down the driver ring
3378 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3380 cik_cp_gfx_enable(rdev, false);
3381 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3385 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3387 * @rdev: radeon_device pointer
3389 * Program the location and size of the gfx ring buffer
3390 * and test it to make sure it's working.
3391 * Returns 0 for success, error for failure.
3393 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3395 struct radeon_ring *ring;
3401 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3402 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3404 /* Set the write pointer delay */
3405 WREG32(CP_RB_WPTR_DELAY, 0);
3407 /* set the RB to use vmid 0 */
3408 WREG32(CP_RB_VMID, 0);
3410 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3412 /* ring 0 - compute and gfx */
3413 /* Set ring buffer size */
3414 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3415 rb_bufsz = order_base_2(ring->ring_size / 8);
3416 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3418 tmp |= BUF_SWAP_32BIT;
3420 WREG32(CP_RB0_CNTL, tmp);
3422 /* Initialize the ring buffer's read and write pointers */
3423 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3425 WREG32(CP_RB0_WPTR, ring->wptr);
3427 /* set the wb address wether it's enabled or not */
3428 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3429 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3431 /* scratch register shadowing is no longer supported */
3432 WREG32(SCRATCH_UMSK, 0);
3434 if (!rdev->wb.enabled)
3435 tmp |= RB_NO_UPDATE;
3438 WREG32(CP_RB0_CNTL, tmp);
3440 rb_addr = ring->gpu_addr >> 8;
3441 WREG32(CP_RB0_BASE, rb_addr);
3442 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3444 ring->rptr = RREG32(CP_RB0_RPTR);
3446 /* start the ring */
3447 cik_cp_gfx_start(rdev);
3448 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3449 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3451 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3457 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3458 struct radeon_ring *ring)
3464 if (rdev->wb.enabled) {
3465 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3467 mutex_lock(&rdev->srbm_mutex);
3468 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3469 rptr = RREG32(CP_HQD_PQ_RPTR);
3470 cik_srbm_select(rdev, 0, 0, 0, 0);
3471 mutex_unlock(&rdev->srbm_mutex);
3477 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3478 struct radeon_ring *ring)
3482 if (rdev->wb.enabled) {
3483 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3485 mutex_lock(&rdev->srbm_mutex);
3486 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3487 wptr = RREG32(CP_HQD_PQ_WPTR);
3488 cik_srbm_select(rdev, 0, 0, 0, 0);
3489 mutex_unlock(&rdev->srbm_mutex);
3495 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3496 struct radeon_ring *ring)
3498 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3499 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3503 * cik_cp_compute_enable - enable/disable the compute CP MEs
3505 * @rdev: radeon_device pointer
3506 * @enable: enable or disable the MEs
3508 * Halts or unhalts the compute MEs.
3510 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3513 WREG32(CP_MEC_CNTL, 0);
3515 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3520 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3522 * @rdev: radeon_device pointer
3524 * Loads the compute MEC1&2 ucode.
3525 * Returns 0 for success, -EINVAL if the ucode is not available.
3527 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3529 const __be32 *fw_data;
3535 cik_cp_compute_enable(rdev, false);
3538 fw_data = (const __be32 *)rdev->mec_fw->data;
3539 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3540 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3541 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3542 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3544 if (rdev->family == CHIP_KAVERI) {
3546 fw_data = (const __be32 *)rdev->mec_fw->data;
3547 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3548 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3549 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3550 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3557 * cik_cp_compute_start - start the compute queues
3559 * @rdev: radeon_device pointer
3561 * Enable the compute queues.
3562 * Returns 0 for success, error for failure.
3564 static int cik_cp_compute_start(struct radeon_device *rdev)
3566 cik_cp_compute_enable(rdev, true);
3572 * cik_cp_compute_fini - stop the compute queues
3574 * @rdev: radeon_device pointer
3576 * Stop the compute queues and tear down the driver queue
3579 static void cik_cp_compute_fini(struct radeon_device *rdev)
3583 cik_cp_compute_enable(rdev, false);
3585 for (i = 0; i < 2; i++) {
3587 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3589 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3591 if (rdev->ring[idx].mqd_obj) {
3592 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3593 if (unlikely(r != 0))
3594 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3596 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3597 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3599 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3600 rdev->ring[idx].mqd_obj = NULL;
3605 static void cik_mec_fini(struct radeon_device *rdev)
3609 if (rdev->mec.hpd_eop_obj) {
3610 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3611 if (unlikely(r != 0))
3612 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3613 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3614 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3616 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3617 rdev->mec.hpd_eop_obj = NULL;
3621 #define MEC_HPD_SIZE 2048
3623 static int cik_mec_init(struct radeon_device *rdev)
3629 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3630 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3632 if (rdev->family == CHIP_KAVERI)
3633 rdev->mec.num_mec = 2;
3635 rdev->mec.num_mec = 1;
3636 rdev->mec.num_pipe = 4;
3637 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3639 if (rdev->mec.hpd_eop_obj == NULL) {
3640 r = radeon_bo_create(rdev,
3641 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3643 RADEON_GEM_DOMAIN_GTT, NULL,
3644 &rdev->mec.hpd_eop_obj);
3646 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3651 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3652 if (unlikely(r != 0)) {
3656 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3657 &rdev->mec.hpd_eop_gpu_addr);
3659 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3663 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3665 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3670 /* clear memory. Not sure if this is required or not */
3671 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3673 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3674 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3679 struct hqd_registers
3681 u32 cp_mqd_base_addr;
3682 u32 cp_mqd_base_addr_hi;
3685 u32 cp_hqd_persistent_state;
3686 u32 cp_hqd_pipe_priority;
3687 u32 cp_hqd_queue_priority;
3690 u32 cp_hqd_pq_base_hi;
3692 u32 cp_hqd_pq_rptr_report_addr;
3693 u32 cp_hqd_pq_rptr_report_addr_hi;
3694 u32 cp_hqd_pq_wptr_poll_addr;
3695 u32 cp_hqd_pq_wptr_poll_addr_hi;
3696 u32 cp_hqd_pq_doorbell_control;
3698 u32 cp_hqd_pq_control;
3699 u32 cp_hqd_ib_base_addr;
3700 u32 cp_hqd_ib_base_addr_hi;
3702 u32 cp_hqd_ib_control;
3703 u32 cp_hqd_iq_timer;
3705 u32 cp_hqd_dequeue_request;
3706 u32 cp_hqd_dma_offload;
3707 u32 cp_hqd_sema_cmd;
3708 u32 cp_hqd_msg_type;
3709 u32 cp_hqd_atomic0_preop_lo;
3710 u32 cp_hqd_atomic0_preop_hi;
3711 u32 cp_hqd_atomic1_preop_lo;
3712 u32 cp_hqd_atomic1_preop_hi;
3713 u32 cp_hqd_hq_scheduler0;
3714 u32 cp_hqd_hq_scheduler1;
3721 u32 dispatch_initiator;
3725 u32 pipeline_stat_enable;
3726 u32 perf_counter_enable;
3732 u32 resource_limits;
3733 u32 static_thread_mgmt01[2];
3735 u32 static_thread_mgmt23[2];
3737 u32 thread_trace_enable;
3740 u32 vgtcs_invoke_count[2];
3741 struct hqd_registers queue_state;
3743 u32 interrupt_queue[64];
3747 * cik_cp_compute_resume - setup the compute queue registers
3749 * @rdev: radeon_device pointer
3751 * Program the compute queues and test them to make sure they
3753 * Returns 0 for success, error for failure.
3755 static int cik_cp_compute_resume(struct radeon_device *rdev)
3759 bool use_doorbell = true;
3765 struct bonaire_mqd *mqd;
3767 r = cik_cp_compute_start(rdev);
3771 /* fix up chicken bits */
3772 tmp = RREG32(CP_CPF_DEBUG);
3774 WREG32(CP_CPF_DEBUG, tmp);
3776 /* init the pipes */
3777 mutex_lock(&rdev->srbm_mutex);
3778 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3779 int me = (i < 4) ? 1 : 2;
3780 int pipe = (i < 4) ? i : (i - 4);
3782 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3784 cik_srbm_select(rdev, me, pipe, 0, 0);
3786 /* write the EOP addr */
3787 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3788 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3790 /* set the VMID assigned */
3791 WREG32(CP_HPD_EOP_VMID, 0);
3793 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3794 tmp = RREG32(CP_HPD_EOP_CONTROL);
3795 tmp &= ~EOP_SIZE_MASK;
3796 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3797 WREG32(CP_HPD_EOP_CONTROL, tmp);
3799 cik_srbm_select(rdev, 0, 0, 0, 0);
3800 mutex_unlock(&rdev->srbm_mutex);
3802 /* init the queues. Just two for now. */
3803 for (i = 0; i < 2; i++) {
3805 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3807 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3809 if (rdev->ring[idx].mqd_obj == NULL) {
3810 r = radeon_bo_create(rdev,
3811 sizeof(struct bonaire_mqd),
3813 RADEON_GEM_DOMAIN_GTT, NULL,
3814 &rdev->ring[idx].mqd_obj);
3816 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3821 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3822 if (unlikely(r != 0)) {
3823 cik_cp_compute_fini(rdev);
3826 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3829 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3830 cik_cp_compute_fini(rdev);
3833 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3835 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3836 cik_cp_compute_fini(rdev);
3840 /* doorbell offset */
3841 rdev->ring[idx].doorbell_offset =
3842 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3844 /* init the mqd struct */
3845 memset(buf, 0, sizeof(struct bonaire_mqd));
3847 mqd = (struct bonaire_mqd *)buf;
3848 mqd->header = 0xC0310800;
3849 mqd->static_thread_mgmt01[0] = 0xffffffff;
3850 mqd->static_thread_mgmt01[1] = 0xffffffff;
3851 mqd->static_thread_mgmt23[0] = 0xffffffff;
3852 mqd->static_thread_mgmt23[1] = 0xffffffff;
3854 mutex_lock(&rdev->srbm_mutex);
3855 cik_srbm_select(rdev, rdev->ring[idx].me,
3856 rdev->ring[idx].pipe,
3857 rdev->ring[idx].queue, 0);
3859 /* disable wptr polling */
3860 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3861 tmp &= ~WPTR_POLL_EN;
3862 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3864 /* enable doorbell? */
3865 mqd->queue_state.cp_hqd_pq_doorbell_control =
3866 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3868 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3870 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3871 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3872 mqd->queue_state.cp_hqd_pq_doorbell_control);
3874 /* disable the queue if it's active */
3875 mqd->queue_state.cp_hqd_dequeue_request = 0;
3876 mqd->queue_state.cp_hqd_pq_rptr = 0;
3877 mqd->queue_state.cp_hqd_pq_wptr= 0;
3878 if (RREG32(CP_HQD_ACTIVE) & 1) {
3879 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3880 for (i = 0; i < rdev->usec_timeout; i++) {
3881 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3885 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3886 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3887 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3890 /* set the pointer to the MQD */
3891 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3892 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3893 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3894 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3895 /* set MQD vmid to 0 */
3896 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3897 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3898 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3900 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3901 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3902 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3903 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3904 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3905 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3907 /* set up the HQD, this is similar to CP_RB0_CNTL */
3908 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3909 mqd->queue_state.cp_hqd_pq_control &=
3910 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3912 mqd->queue_state.cp_hqd_pq_control |=
3913 order_base_2(rdev->ring[idx].ring_size / 8);
3914 mqd->queue_state.cp_hqd_pq_control |=
3915 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3917 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3919 mqd->queue_state.cp_hqd_pq_control &=
3920 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3921 mqd->queue_state.cp_hqd_pq_control |=
3922 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3923 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3925 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3927 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3929 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3930 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3931 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3932 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3933 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3934 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3936 /* set the wb address wether it's enabled or not */
3938 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3940 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3941 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3942 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3943 upper_32_bits(wb_gpu_addr) & 0xffff;
3944 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3945 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3946 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3947 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3949 /* enable the doorbell if requested */
3951 mqd->queue_state.cp_hqd_pq_doorbell_control =
3952 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3953 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3954 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3955 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3956 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3957 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3958 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3961 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3963 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3964 mqd->queue_state.cp_hqd_pq_doorbell_control);
3966 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3967 rdev->ring[idx].wptr = 0;
3968 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3969 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3970 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3971 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3973 /* set the vmid for the queue */
3974 mqd->queue_state.cp_hqd_vmid = 0;
3975 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3977 /* activate the queue */
3978 mqd->queue_state.cp_hqd_active = 1;
3979 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3981 cik_srbm_select(rdev, 0, 0, 0, 0);
3982 mutex_unlock(&rdev->srbm_mutex);
3984 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3985 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3987 rdev->ring[idx].ready = true;
3988 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3990 rdev->ring[idx].ready = false;
3996 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3998 cik_cp_gfx_enable(rdev, enable);
3999 cik_cp_compute_enable(rdev, enable);
4002 static int cik_cp_load_microcode(struct radeon_device *rdev)
4006 r = cik_cp_gfx_load_microcode(rdev);
4009 r = cik_cp_compute_load_microcode(rdev);
4016 static void cik_cp_fini(struct radeon_device *rdev)
4018 cik_cp_gfx_fini(rdev);
4019 cik_cp_compute_fini(rdev);
4022 static int cik_cp_resume(struct radeon_device *rdev)
4026 cik_enable_gui_idle_interrupt(rdev, false);
4028 r = cik_cp_load_microcode(rdev);
4032 r = cik_cp_gfx_resume(rdev);
4035 r = cik_cp_compute_resume(rdev);
4039 cik_enable_gui_idle_interrupt(rdev, true);
4044 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4046 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4047 RREG32(GRBM_STATUS));
4048 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4049 RREG32(GRBM_STATUS2));
4050 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4051 RREG32(GRBM_STATUS_SE0));
4052 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4053 RREG32(GRBM_STATUS_SE1));
4054 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4055 RREG32(GRBM_STATUS_SE2));
4056 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4057 RREG32(GRBM_STATUS_SE3));
4058 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4059 RREG32(SRBM_STATUS));
4060 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4061 RREG32(SRBM_STATUS2));
4062 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4063 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4064 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4065 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4066 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4067 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4068 RREG32(CP_STALLED_STAT1));
4069 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4070 RREG32(CP_STALLED_STAT2));
4071 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4072 RREG32(CP_STALLED_STAT3));
4073 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4074 RREG32(CP_CPF_BUSY_STAT));
4075 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4076 RREG32(CP_CPF_STALLED_STAT1));
4077 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4078 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4079 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4080 RREG32(CP_CPC_STALLED_STAT1));
4081 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4085 * cik_gpu_check_soft_reset - check which blocks are busy
4087 * @rdev: radeon_device pointer
4089 * Check which blocks are busy and return the relevant reset
4090 * mask to be used by cik_gpu_soft_reset().
4091 * Returns a mask of the blocks to be reset.
4093 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4099 tmp = RREG32(GRBM_STATUS);
4100 if (tmp & (PA_BUSY | SC_BUSY |
4101 BCI_BUSY | SX_BUSY |
4102 TA_BUSY | VGT_BUSY |
4104 GDS_BUSY | SPI_BUSY |
4105 IA_BUSY | IA_BUSY_NO_DMA))
4106 reset_mask |= RADEON_RESET_GFX;
4108 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4109 reset_mask |= RADEON_RESET_CP;
4112 tmp = RREG32(GRBM_STATUS2);
4114 reset_mask |= RADEON_RESET_RLC;
4116 /* SDMA0_STATUS_REG */
4117 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4118 if (!(tmp & SDMA_IDLE))
4119 reset_mask |= RADEON_RESET_DMA;
4121 /* SDMA1_STATUS_REG */
4122 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4123 if (!(tmp & SDMA_IDLE))
4124 reset_mask |= RADEON_RESET_DMA1;
4127 tmp = RREG32(SRBM_STATUS2);
4128 if (tmp & SDMA_BUSY)
4129 reset_mask |= RADEON_RESET_DMA;
4131 if (tmp & SDMA1_BUSY)
4132 reset_mask |= RADEON_RESET_DMA1;
4135 tmp = RREG32(SRBM_STATUS);
4138 reset_mask |= RADEON_RESET_IH;
4141 reset_mask |= RADEON_RESET_SEM;
4143 if (tmp & GRBM_RQ_PENDING)
4144 reset_mask |= RADEON_RESET_GRBM;
4147 reset_mask |= RADEON_RESET_VMC;
4149 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4150 MCC_BUSY | MCD_BUSY))
4151 reset_mask |= RADEON_RESET_MC;
4153 if (evergreen_is_display_hung(rdev))
4154 reset_mask |= RADEON_RESET_DISPLAY;
4156 /* Skip MC reset as it's mostly likely not hung, just busy */
4157 if (reset_mask & RADEON_RESET_MC) {
4158 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4159 reset_mask &= ~RADEON_RESET_MC;
4166 * cik_gpu_soft_reset - soft reset GPU
4168 * @rdev: radeon_device pointer
4169 * @reset_mask: mask of which blocks to reset
4171 * Soft reset the blocks specified in @reset_mask.
4173 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4175 struct evergreen_mc_save save;
4176 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4179 if (reset_mask == 0)
4182 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4184 cik_print_gpu_status_regs(rdev);
4185 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4186 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4187 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4188 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4197 /* Disable GFX parsing/prefetching */
4198 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4200 /* Disable MEC parsing/prefetching */
4201 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4203 if (reset_mask & RADEON_RESET_DMA) {
4205 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4207 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4209 if (reset_mask & RADEON_RESET_DMA1) {
4211 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4213 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4216 evergreen_mc_stop(rdev, &save);
4217 if (evergreen_mc_wait_for_idle(rdev)) {
4218 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4221 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4222 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4224 if (reset_mask & RADEON_RESET_CP) {
4225 grbm_soft_reset |= SOFT_RESET_CP;
4227 srbm_soft_reset |= SOFT_RESET_GRBM;
4230 if (reset_mask & RADEON_RESET_DMA)
4231 srbm_soft_reset |= SOFT_RESET_SDMA;
4233 if (reset_mask & RADEON_RESET_DMA1)
4234 srbm_soft_reset |= SOFT_RESET_SDMA1;
4236 if (reset_mask & RADEON_RESET_DISPLAY)
4237 srbm_soft_reset |= SOFT_RESET_DC;
4239 if (reset_mask & RADEON_RESET_RLC)
4240 grbm_soft_reset |= SOFT_RESET_RLC;
4242 if (reset_mask & RADEON_RESET_SEM)
4243 srbm_soft_reset |= SOFT_RESET_SEM;
4245 if (reset_mask & RADEON_RESET_IH)
4246 srbm_soft_reset |= SOFT_RESET_IH;
4248 if (reset_mask & RADEON_RESET_GRBM)
4249 srbm_soft_reset |= SOFT_RESET_GRBM;
4251 if (reset_mask & RADEON_RESET_VMC)
4252 srbm_soft_reset |= SOFT_RESET_VMC;
4254 if (!(rdev->flags & RADEON_IS_IGP)) {
4255 if (reset_mask & RADEON_RESET_MC)
4256 srbm_soft_reset |= SOFT_RESET_MC;
4259 if (grbm_soft_reset) {
4260 tmp = RREG32(GRBM_SOFT_RESET);
4261 tmp |= grbm_soft_reset;
4262 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4263 WREG32(GRBM_SOFT_RESET, tmp);
4264 tmp = RREG32(GRBM_SOFT_RESET);
4268 tmp &= ~grbm_soft_reset;
4269 WREG32(GRBM_SOFT_RESET, tmp);
4270 tmp = RREG32(GRBM_SOFT_RESET);
4273 if (srbm_soft_reset) {
4274 tmp = RREG32(SRBM_SOFT_RESET);
4275 tmp |= srbm_soft_reset;
4276 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4277 WREG32(SRBM_SOFT_RESET, tmp);
4278 tmp = RREG32(SRBM_SOFT_RESET);
4282 tmp &= ~srbm_soft_reset;
4283 WREG32(SRBM_SOFT_RESET, tmp);
4284 tmp = RREG32(SRBM_SOFT_RESET);
4287 /* Wait a little for things to settle down */
4290 evergreen_mc_resume(rdev, &save);
4293 cik_print_gpu_status_regs(rdev);
4297 * cik_asic_reset - soft reset GPU
4299 * @rdev: radeon_device pointer
4301 * Look up which blocks are hung and attempt
4303 * Returns 0 for success.
4305 int cik_asic_reset(struct radeon_device *rdev)
4309 reset_mask = cik_gpu_check_soft_reset(rdev);
4312 r600_set_bios_scratch_engine_hung(rdev, true);
4314 cik_gpu_soft_reset(rdev, reset_mask);
4316 reset_mask = cik_gpu_check_soft_reset(rdev);
4319 r600_set_bios_scratch_engine_hung(rdev, false);
4325 * cik_gfx_is_lockup - check if the 3D engine is locked up
4327 * @rdev: radeon_device pointer
4328 * @ring: radeon_ring structure holding ring information
4330 * Check if the 3D engine is locked up (CIK).
4331 * Returns true if the engine is locked, false if not.
4333 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4335 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4337 if (!(reset_mask & (RADEON_RESET_GFX |
4338 RADEON_RESET_COMPUTE |
4339 RADEON_RESET_CP))) {
4340 radeon_ring_lockup_update(ring);
4343 /* force CP activities */
4344 radeon_ring_force_activity(rdev, ring);
4345 return radeon_ring_test_lockup(rdev, ring);
4350 * cik_mc_program - program the GPU memory controller
4352 * @rdev: radeon_device pointer
4354 * Set the location of vram, gart, and AGP in the GPU's
4355 * physical address space (CIK).
4357 static void cik_mc_program(struct radeon_device *rdev)
4359 struct evergreen_mc_save save;
4363 /* Initialize HDP */
4364 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4365 WREG32((0x2c14 + j), 0x00000000);
4366 WREG32((0x2c18 + j), 0x00000000);
4367 WREG32((0x2c1c + j), 0x00000000);
4368 WREG32((0x2c20 + j), 0x00000000);
4369 WREG32((0x2c24 + j), 0x00000000);
4371 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4373 evergreen_mc_stop(rdev, &save);
4374 if (radeon_mc_wait_for_idle(rdev)) {
4375 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4377 /* Lockout access through VGA aperture*/
4378 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4379 /* Update configuration */
4380 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4381 rdev->mc.vram_start >> 12);
4382 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4383 rdev->mc.vram_end >> 12);
4384 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4385 rdev->vram_scratch.gpu_addr >> 12);
4386 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4387 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4388 WREG32(MC_VM_FB_LOCATION, tmp);
4389 /* XXX double check these! */
4390 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4391 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4392 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4393 WREG32(MC_VM_AGP_BASE, 0);
4394 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4395 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4396 if (radeon_mc_wait_for_idle(rdev)) {
4397 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4399 evergreen_mc_resume(rdev, &save);
4400 /* we need to own VRAM, so turn off the VGA renderer here
4401 * to stop it overwriting our objects */
4402 rv515_vga_render_disable(rdev);
4406 * cik_mc_init - initialize the memory controller driver params
4408 * @rdev: radeon_device pointer
4410 * Look up the amount of vram, vram width, and decide how to place
4411 * vram and gart within the GPU's physical address space (CIK).
4412 * Returns 0 for success.
4414 static int cik_mc_init(struct radeon_device *rdev)
4417 int chansize, numchan;
4419 /* Get VRAM informations */
4420 rdev->mc.vram_is_ddr = true;
4421 tmp = RREG32(MC_ARB_RAMCFG);
4422 if (tmp & CHANSIZE_MASK) {
4427 tmp = RREG32(MC_SHARED_CHMAP);
4428 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4458 rdev->mc.vram_width = numchan * chansize;
4459 /* Could aper size report 0 ? */
4460 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4461 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4462 /* size in MB on si */
4463 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4464 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4465 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4466 si_vram_gtt_location(rdev, &rdev->mc);
4467 radeon_update_bandwidth_info(rdev);
4474 * VMID 0 is the physical GPU addresses as used by the kernel.
4475 * VMIDs 1-15 are used for userspace clients and are handled
4476 * by the radeon vm/hsa code.
4479 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4481 * @rdev: radeon_device pointer
4483 * Flush the TLB for the VMID 0 page table (CIK).
4485 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4487 /* flush hdp cache */
4488 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4490 /* bits 0-15 are the VM contexts0-15 */
4491 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4495 * cik_pcie_gart_enable - gart enable
4497 * @rdev: radeon_device pointer
4499 * This sets up the TLBs, programs the page tables for VMID0,
4500 * sets up the hw for VMIDs 1-15 which are allocated on
4501 * demand, and sets up the global locations for the LDS, GDS,
4502 * and GPUVM for FSA64 clients (CIK).
4503 * Returns 0 for success, errors for failure.
4505 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4509 if (rdev->gart.robj == NULL) {
4510 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4513 r = radeon_gart_table_vram_pin(rdev);
4516 radeon_gart_restore(rdev);
4517 /* Setup TLB control */
4518 WREG32(MC_VM_MX_L1_TLB_CNTL,
4521 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4522 ENABLE_ADVANCED_DRIVER_MODEL |
4523 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4524 /* Setup L2 cache */
4525 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4526 ENABLE_L2_FRAGMENT_PROCESSING |
4527 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4528 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4529 EFFECTIVE_L2_QUEUE_SIZE(7) |
4530 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4531 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4532 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4533 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4534 /* setup context0 */
4535 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4536 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4537 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4538 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4539 (u32)(rdev->dummy_page.addr >> 12));
4540 WREG32(VM_CONTEXT0_CNTL2, 0);
4541 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4542 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4548 /* empty context1-15 */
4549 /* FIXME start with 4G, once using 2 level pt switch to full
4552 /* set vm size, must be a multiple of 4 */
4553 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4554 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4555 for (i = 1; i < 16; i++) {
4557 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4558 rdev->gart.table_addr >> 12);
4560 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4561 rdev->gart.table_addr >> 12);
4564 /* enable context1-15 */
4565 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4566 (u32)(rdev->dummy_page.addr >> 12));
4567 WREG32(VM_CONTEXT1_CNTL2, 4);
4568 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4569 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4570 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4571 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4572 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4573 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4574 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4575 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4576 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4577 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4578 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4579 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4580 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4582 /* TC cache setup ??? */
4583 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4584 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4585 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4587 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4588 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4589 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4590 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4591 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4593 WREG32(TC_CFG_L1_VOLATILE, 0);
4594 WREG32(TC_CFG_L2_VOLATILE, 0);
4596 if (rdev->family == CHIP_KAVERI) {
4597 u32 tmp = RREG32(CHUB_CONTROL);
4599 WREG32(CHUB_CONTROL, tmp);
4602 /* XXX SH_MEM regs */
4603 /* where to put LDS, scratch, GPUVM in FSA64 space */
4604 mutex_lock(&rdev->srbm_mutex);
4605 for (i = 0; i < 16; i++) {
4606 cik_srbm_select(rdev, 0, 0, 0, i);
4607 /* CP and shaders */
4608 WREG32(SH_MEM_CONFIG, 0);
4609 WREG32(SH_MEM_APE1_BASE, 1);
4610 WREG32(SH_MEM_APE1_LIMIT, 0);
4611 WREG32(SH_MEM_BASES, 0);
4613 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4614 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4615 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4616 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4617 /* XXX SDMA RLC - todo */
4619 cik_srbm_select(rdev, 0, 0, 0, 0);
4620 mutex_unlock(&rdev->srbm_mutex);
4622 cik_pcie_gart_tlb_flush(rdev);
4623 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4624 (unsigned)(rdev->mc.gtt_size >> 20),
4625 (unsigned long long)rdev->gart.table_addr);
4626 rdev->gart.ready = true;
4631 * cik_pcie_gart_disable - gart disable
4633 * @rdev: radeon_device pointer
4635 * This disables all VM page table (CIK).
4637 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4639 /* Disable all tables */
4640 WREG32(VM_CONTEXT0_CNTL, 0);
4641 WREG32(VM_CONTEXT1_CNTL, 0);
4642 /* Setup TLB control */
4643 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4644 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4645 /* Setup L2 cache */
4647 ENABLE_L2_FRAGMENT_PROCESSING |
4648 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4649 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4650 EFFECTIVE_L2_QUEUE_SIZE(7) |
4651 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4652 WREG32(VM_L2_CNTL2, 0);
4653 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4654 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4655 radeon_gart_table_vram_unpin(rdev);
4659 * cik_pcie_gart_fini - vm fini callback
4661 * @rdev: radeon_device pointer
4663 * Tears down the driver GART/VM setup (CIK).
4665 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4667 cik_pcie_gart_disable(rdev);
4668 radeon_gart_table_vram_free(rdev);
4669 radeon_gart_fini(rdev);
4674 * cik_ib_parse - vm ib_parse callback
4676 * @rdev: radeon_device pointer
4677 * @ib: indirect buffer pointer
4679 * CIK uses hw IB checking so this is a nop (CIK).
4681 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4688 * VMID 0 is the physical GPU addresses as used by the kernel.
4689 * VMIDs 1-15 are used for userspace clients and are handled
4690 * by the radeon vm/hsa code.
4693 * cik_vm_init - cik vm init callback
4695 * @rdev: radeon_device pointer
4697 * Inits cik specific vm parameters (number of VMs, base of vram for
4698 * VMIDs 1-15) (CIK).
4699 * Returns 0 for success.
4701 int cik_vm_init(struct radeon_device *rdev)
4704 rdev->vm_manager.nvm = 16;
4705 /* base offset of vram pages */
4706 if (rdev->flags & RADEON_IS_IGP) {
4707 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4709 rdev->vm_manager.vram_base_offset = tmp;
4711 rdev->vm_manager.vram_base_offset = 0;
4717 * cik_vm_fini - cik vm fini callback
4719 * @rdev: radeon_device pointer
4721 * Tear down any asic specific VM setup (CIK).
4723 void cik_vm_fini(struct radeon_device *rdev)
4728 * cik_vm_decode_fault - print human readable fault info
4730 * @rdev: radeon_device pointer
4731 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4732 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4734 * Print human readable fault information (CIK).
4736 static void cik_vm_decode_fault(struct radeon_device *rdev,
4737 u32 status, u32 addr, u32 mc_client)
4739 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4740 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4741 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4742 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4743 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4745 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4746 protections, vmid, addr,
4747 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4748 block, mc_client, mc_id);
4752 * cik_vm_flush - cik vm flush using the CP
4754 * @rdev: radeon_device pointer
4756 * Update the page table base and flush the VM TLB
4757 * using the CP (CIK).
4759 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4761 struct radeon_ring *ring = &rdev->ring[ridx];
4766 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4767 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4768 WRITE_DATA_DST_SEL(0)));
4770 radeon_ring_write(ring,
4771 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4773 radeon_ring_write(ring,
4774 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4776 radeon_ring_write(ring, 0);
4777 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4779 /* update SH_MEM_* regs */
4780 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4781 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4782 WRITE_DATA_DST_SEL(0)));
4783 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4784 radeon_ring_write(ring, 0);
4785 radeon_ring_write(ring, VMID(vm->id));
4787 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4788 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4789 WRITE_DATA_DST_SEL(0)));
4790 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4791 radeon_ring_write(ring, 0);
4793 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4794 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4795 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4796 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4798 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4799 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4800 WRITE_DATA_DST_SEL(0)));
4801 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4802 radeon_ring_write(ring, 0);
4803 radeon_ring_write(ring, VMID(0));
4806 /* We should be using the WAIT_REG_MEM packet here like in
4807 * cik_fence_ring_emit(), but it causes the CP to hang in this
4810 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4811 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4812 WRITE_DATA_DST_SEL(0)));
4813 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4814 radeon_ring_write(ring, 0);
4815 radeon_ring_write(ring, 0);
4817 /* bits 0-15 are the VM contexts0-15 */
4818 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4819 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4820 WRITE_DATA_DST_SEL(0)));
4821 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4822 radeon_ring_write(ring, 0);
4823 radeon_ring_write(ring, 1 << vm->id);
4825 /* compute doesn't have PFP */
4826 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4827 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4828 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4829 radeon_ring_write(ring, 0x0);
4834 * cik_vm_set_page - update the page tables using sDMA
4836 * @rdev: radeon_device pointer
4837 * @ib: indirect buffer to fill with commands
4838 * @pe: addr of the page entry
4839 * @addr: dst addr to write into pe
4840 * @count: number of page entries to update
4841 * @incr: increase next addr by incr bytes
4842 * @flags: access flags
4844 * Update the page tables using CP or sDMA (CIK).
4846 void cik_vm_set_page(struct radeon_device *rdev,
4847 struct radeon_ib *ib,
4849 uint64_t addr, unsigned count,
4850 uint32_t incr, uint32_t flags)
4852 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4856 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4859 ndw = 2 + count * 2;
4863 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4864 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4865 WRITE_DATA_DST_SEL(1));
4866 ib->ptr[ib->length_dw++] = pe;
4867 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4868 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4869 if (flags & RADEON_VM_PAGE_SYSTEM) {
4870 value = radeon_vm_map_gart(rdev, addr);
4871 value &= 0xFFFFFFFFFFFFF000ULL;
4872 } else if (flags & RADEON_VM_PAGE_VALID) {
4878 value |= r600_flags;
4879 ib->ptr[ib->length_dw++] = value;
4880 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4885 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4891 * The RLC is a multi-purpose microengine that handles a
4892 * variety of functions, the most important of which is
4893 * the interrupt controller.
4895 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4898 u32 tmp = RREG32(CP_INT_CNTL_RING0);
4901 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4903 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4904 WREG32(CP_INT_CNTL_RING0, tmp);
4907 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4911 tmp = RREG32(RLC_LB_CNTL);
4913 tmp |= LOAD_BALANCE_ENABLE;
4915 tmp &= ~LOAD_BALANCE_ENABLE;
4916 WREG32(RLC_LB_CNTL, tmp);
4919 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4924 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4925 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4926 cik_select_se_sh(rdev, i, j);
4927 for (k = 0; k < rdev->usec_timeout; k++) {
4928 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4934 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4936 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4937 for (k = 0; k < rdev->usec_timeout; k++) {
4938 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4944 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4948 tmp = RREG32(RLC_CNTL);
4950 WREG32(RLC_CNTL, rlc);
4953 static u32 cik_halt_rlc(struct radeon_device *rdev)
4957 orig = data = RREG32(RLC_CNTL);
4959 if (data & RLC_ENABLE) {
4962 data &= ~RLC_ENABLE;
4963 WREG32(RLC_CNTL, data);
4965 for (i = 0; i < rdev->usec_timeout; i++) {
4966 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4971 cik_wait_for_rlc_serdes(rdev);
4977 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4981 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4982 WREG32(RLC_GPR_REG2, tmp);
4984 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4985 for (i = 0; i < rdev->usec_timeout; i++) {
4986 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4991 for (i = 0; i < rdev->usec_timeout; i++) {
4992 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4998 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5002 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5003 WREG32(RLC_GPR_REG2, tmp);
5007 * cik_rlc_stop - stop the RLC ME
5009 * @rdev: radeon_device pointer
5011 * Halt the RLC ME (MicroEngine) (CIK).
5013 static void cik_rlc_stop(struct radeon_device *rdev)
5015 WREG32(RLC_CNTL, 0);
5017 cik_enable_gui_idle_interrupt(rdev, false);
5019 cik_wait_for_rlc_serdes(rdev);
5023 * cik_rlc_start - start the RLC ME
5025 * @rdev: radeon_device pointer
5027 * Unhalt the RLC ME (MicroEngine) (CIK).
5029 static void cik_rlc_start(struct radeon_device *rdev)
5031 WREG32(RLC_CNTL, RLC_ENABLE);
5033 cik_enable_gui_idle_interrupt(rdev, true);
5039 * cik_rlc_resume - setup the RLC hw
5041 * @rdev: radeon_device pointer
5043 * Initialize the RLC registers, load the ucode,
5044 * and start the RLC (CIK).
5045 * Returns 0 for success, -EINVAL if the ucode is not available.
5047 static int cik_rlc_resume(struct radeon_device *rdev)
5050 const __be32 *fw_data;
5055 switch (rdev->family) {
5058 size = BONAIRE_RLC_UCODE_SIZE;
5061 size = KV_RLC_UCODE_SIZE;
5064 size = KB_RLC_UCODE_SIZE;
5071 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5072 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5080 WREG32(RLC_LB_CNTR_INIT, 0);
5081 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5083 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5084 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5085 WREG32(RLC_LB_PARAMS, 0x00600408);
5086 WREG32(RLC_LB_CNTL, 0x80000004);
5088 WREG32(RLC_MC_CNTL, 0);
5089 WREG32(RLC_UCODE_CNTL, 0);
5091 fw_data = (const __be32 *)rdev->rlc_fw->data;
5092 WREG32(RLC_GPM_UCODE_ADDR, 0);
5093 for (i = 0; i < size; i++)
5094 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5095 WREG32(RLC_GPM_UCODE_ADDR, 0);
5097 /* XXX - find out what chips support lbpw */
5098 cik_enable_lbpw(rdev, false);
5100 if (rdev->family == CHIP_BONAIRE)
5101 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5103 cik_rlc_start(rdev);
5108 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5110 u32 data, orig, tmp, tmp2;
5112 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5114 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5115 cik_enable_gui_idle_interrupt(rdev, true);
5117 tmp = cik_halt_rlc(rdev);
5119 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5120 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5121 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5122 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5123 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5125 cik_update_rlc(rdev, tmp);
5127 data |= CGCG_EN | CGLS_EN;
5129 cik_enable_gui_idle_interrupt(rdev, false);
5131 RREG32(CB_CGTT_SCLK_CTRL);
5132 RREG32(CB_CGTT_SCLK_CTRL);
5133 RREG32(CB_CGTT_SCLK_CTRL);
5134 RREG32(CB_CGTT_SCLK_CTRL);
5136 data &= ~(CGCG_EN | CGLS_EN);
5140 WREG32(RLC_CGCG_CGLS_CTRL, data);
5144 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5146 u32 data, orig, tmp = 0;
5148 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5149 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5150 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5151 orig = data = RREG32(CP_MEM_SLP_CNTL);
5152 data |= CP_MEM_LS_EN;
5154 WREG32(CP_MEM_SLP_CNTL, data);
5158 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5161 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5163 tmp = cik_halt_rlc(rdev);
5165 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5166 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5167 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5168 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5169 WREG32(RLC_SERDES_WR_CTRL, data);
5171 cik_update_rlc(rdev, tmp);
5173 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5174 orig = data = RREG32(CGTS_SM_CTRL_REG);
5175 data &= ~SM_MODE_MASK;
5176 data |= SM_MODE(0x2);
5177 data |= SM_MODE_ENABLE;
5178 data &= ~CGTS_OVERRIDE;
5179 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5180 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5181 data &= ~CGTS_LS_OVERRIDE;
5182 data &= ~ON_MONITOR_ADD_MASK;
5183 data |= ON_MONITOR_ADD_EN;
5184 data |= ON_MONITOR_ADD(0x96);
5186 WREG32(CGTS_SM_CTRL_REG, data);
5189 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5192 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5194 data = RREG32(RLC_MEM_SLP_CNTL);
5195 if (data & RLC_MEM_LS_EN) {
5196 data &= ~RLC_MEM_LS_EN;
5197 WREG32(RLC_MEM_SLP_CNTL, data);
5200 data = RREG32(CP_MEM_SLP_CNTL);
5201 if (data & CP_MEM_LS_EN) {
5202 data &= ~CP_MEM_LS_EN;
5203 WREG32(CP_MEM_SLP_CNTL, data);
5206 orig = data = RREG32(CGTS_SM_CTRL_REG);
5207 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5209 WREG32(CGTS_SM_CTRL_REG, data);
5211 tmp = cik_halt_rlc(rdev);
5213 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5214 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5215 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5216 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5217 WREG32(RLC_SERDES_WR_CTRL, data);
5219 cik_update_rlc(rdev, tmp);
5223 static const u32 mc_cg_registers[] =
5236 static void cik_enable_mc_ls(struct radeon_device *rdev,
5242 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5243 orig = data = RREG32(mc_cg_registers[i]);
5244 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5245 data |= MC_LS_ENABLE;
5247 data &= ~MC_LS_ENABLE;
5249 WREG32(mc_cg_registers[i], data);
5253 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5259 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5260 orig = data = RREG32(mc_cg_registers[i]);
5261 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5262 data |= MC_CG_ENABLE;
5264 data &= ~MC_CG_ENABLE;
5266 WREG32(mc_cg_registers[i], data);
5270 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5275 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5276 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5277 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5279 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5282 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5284 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5287 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5291 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5296 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5297 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5300 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5302 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5305 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5307 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5310 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5312 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5315 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5319 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5324 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5325 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5327 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5329 orig = data = RREG32(UVD_CGC_CTRL);
5332 WREG32(UVD_CGC_CTRL, data);
5334 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5336 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5338 orig = data = RREG32(UVD_CGC_CTRL);
5341 WREG32(UVD_CGC_CTRL, data);
5345 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5350 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5352 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5353 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5354 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5356 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5357 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5360 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5363 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5368 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5370 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5371 data &= ~CLOCK_GATING_DIS;
5373 data |= CLOCK_GATING_DIS;
5376 WREG32(HDP_HOST_PATH_CNTL, data);
5379 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5384 orig = data = RREG32(HDP_MEM_POWER_LS);
5386 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5387 data |= HDP_LS_ENABLE;
5389 data &= ~HDP_LS_ENABLE;
5392 WREG32(HDP_MEM_POWER_LS, data);
5395 void cik_update_cg(struct radeon_device *rdev,
5396 u32 block, bool enable)
5399 if (block & RADEON_CG_BLOCK_GFX) {
5400 cik_enable_gui_idle_interrupt(rdev, false);
5401 /* order matters! */
5403 cik_enable_mgcg(rdev, true);
5404 cik_enable_cgcg(rdev, true);
5406 cik_enable_cgcg(rdev, false);
5407 cik_enable_mgcg(rdev, false);
5409 cik_enable_gui_idle_interrupt(rdev, true);
5412 if (block & RADEON_CG_BLOCK_MC) {
5413 if (!(rdev->flags & RADEON_IS_IGP)) {
5414 cik_enable_mc_mgcg(rdev, enable);
5415 cik_enable_mc_ls(rdev, enable);
5419 if (block & RADEON_CG_BLOCK_SDMA) {
5420 cik_enable_sdma_mgcg(rdev, enable);
5421 cik_enable_sdma_mgls(rdev, enable);
5424 if (block & RADEON_CG_BLOCK_BIF) {
5425 cik_enable_bif_mgls(rdev, enable);
5428 if (block & RADEON_CG_BLOCK_UVD) {
5430 cik_enable_uvd_mgcg(rdev, enable);
5433 if (block & RADEON_CG_BLOCK_HDP) {
5434 cik_enable_hdp_mgcg(rdev, enable);
5435 cik_enable_hdp_ls(rdev, enable);
5439 static void cik_init_cg(struct radeon_device *rdev)
5442 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5445 si_init_uvd_internal_cg(rdev);
5447 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5448 RADEON_CG_BLOCK_SDMA |
5449 RADEON_CG_BLOCK_BIF |
5450 RADEON_CG_BLOCK_UVD |
5451 RADEON_CG_BLOCK_HDP), true);
5454 static void cik_fini_cg(struct radeon_device *rdev)
5456 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5457 RADEON_CG_BLOCK_SDMA |
5458 RADEON_CG_BLOCK_BIF |
5459 RADEON_CG_BLOCK_UVD |
5460 RADEON_CG_BLOCK_HDP), false);
5462 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5465 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5470 orig = data = RREG32(RLC_PG_CNTL);
5471 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5472 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5474 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5476 WREG32(RLC_PG_CNTL, data);
5479 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5484 orig = data = RREG32(RLC_PG_CNTL);
5485 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5486 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5488 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5490 WREG32(RLC_PG_CNTL, data);
5493 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5497 orig = data = RREG32(RLC_PG_CNTL);
5498 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5499 data &= ~DISABLE_CP_PG;
5501 data |= DISABLE_CP_PG;
5503 WREG32(RLC_PG_CNTL, data);
5506 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5510 orig = data = RREG32(RLC_PG_CNTL);
5511 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5512 data &= ~DISABLE_GDS_PG;
5514 data |= DISABLE_GDS_PG;
5516 WREG32(RLC_PG_CNTL, data);
5519 #define CP_ME_TABLE_SIZE 96
5520 #define CP_ME_TABLE_OFFSET 2048
5521 #define CP_MEC_TABLE_OFFSET 4096
5523 void cik_init_cp_pg_table(struct radeon_device *rdev)
5525 const __be32 *fw_data;
5526 volatile u32 *dst_ptr;
5527 int me, i, max_me = 4;
5531 if (rdev->family == CHIP_KAVERI)
5534 if (rdev->rlc.cp_table_ptr == NULL)
5537 /* write the cp table buffer */
5538 dst_ptr = rdev->rlc.cp_table_ptr;
5539 for (me = 0; me < max_me; me++) {
5541 fw_data = (const __be32 *)rdev->ce_fw->data;
5542 table_offset = CP_ME_TABLE_OFFSET;
5543 } else if (me == 1) {
5544 fw_data = (const __be32 *)rdev->pfp_fw->data;
5545 table_offset = CP_ME_TABLE_OFFSET;
5546 } else if (me == 2) {
5547 fw_data = (const __be32 *)rdev->me_fw->data;
5548 table_offset = CP_ME_TABLE_OFFSET;
5550 fw_data = (const __be32 *)rdev->mec_fw->data;
5551 table_offset = CP_MEC_TABLE_OFFSET;
5554 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5555 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5557 bo_offset += CP_ME_TABLE_SIZE;
5561 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5566 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5567 orig = data = RREG32(RLC_PG_CNTL);
5568 data |= GFX_PG_ENABLE;
5570 WREG32(RLC_PG_CNTL, data);
5572 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5575 WREG32(RLC_AUTO_PG_CTRL, data);
5577 orig = data = RREG32(RLC_PG_CNTL);
5578 data &= ~GFX_PG_ENABLE;
5580 WREG32(RLC_PG_CNTL, data);
5582 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5583 data &= ~AUTO_PG_EN;
5585 WREG32(RLC_AUTO_PG_CTRL, data);
5587 data = RREG32(DB_RENDER_CONTROL);
5591 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5593 u32 mask = 0, tmp, tmp1;
5596 cik_select_se_sh(rdev, se, sh);
5597 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5598 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5599 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5606 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5611 return (~tmp) & mask;
5614 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5616 u32 i, j, k, active_cu_number = 0;
5617 u32 mask, counter, cu_bitmap;
5620 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5621 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5625 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5626 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5634 active_cu_number += counter;
5635 tmp |= (cu_bitmap << (i * 16 + j * 8));
5639 WREG32(RLC_PG_AO_CU_MASK, tmp);
5641 tmp = RREG32(RLC_MAX_PG_CU);
5642 tmp &= ~MAX_PU_CU_MASK;
5643 tmp |= MAX_PU_CU(active_cu_number);
5644 WREG32(RLC_MAX_PG_CU, tmp);
5647 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5652 orig = data = RREG32(RLC_PG_CNTL);
5653 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5654 data |= STATIC_PER_CU_PG_ENABLE;
5656 data &= ~STATIC_PER_CU_PG_ENABLE;
5658 WREG32(RLC_PG_CNTL, data);
5661 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5666 orig = data = RREG32(RLC_PG_CNTL);
5667 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5668 data |= DYN_PER_CU_PG_ENABLE;
5670 data &= ~DYN_PER_CU_PG_ENABLE;
5672 WREG32(RLC_PG_CNTL, data);
5675 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5676 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5678 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5683 if (rdev->rlc.cs_data) {
5684 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5685 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5686 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5687 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5689 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5690 for (i = 0; i < 3; i++)
5691 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5693 if (rdev->rlc.reg_list) {
5694 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5695 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5696 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5699 orig = data = RREG32(RLC_PG_CNTL);
5702 WREG32(RLC_PG_CNTL, data);
5704 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5705 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5707 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5708 data &= ~IDLE_POLL_COUNT_MASK;
5709 data |= IDLE_POLL_COUNT(0x60);
5710 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5713 WREG32(RLC_PG_DELAY, data);
5715 data = RREG32(RLC_PG_DELAY_2);
5718 WREG32(RLC_PG_DELAY_2, data);
5720 data = RREG32(RLC_AUTO_PG_CTRL);
5721 data &= ~GRBM_REG_SGIT_MASK;
5722 data |= GRBM_REG_SGIT(0x700);
5723 WREG32(RLC_AUTO_PG_CTRL, data);
5727 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5729 cik_enable_gfx_cgpg(rdev, enable);
5730 cik_enable_gfx_static_mgpg(rdev, enable);
5731 cik_enable_gfx_dynamic_mgpg(rdev, enable);
5734 u32 cik_get_csb_size(struct radeon_device *rdev)
5737 const struct cs_section_def *sect = NULL;
5738 const struct cs_extent_def *ext = NULL;
5740 if (rdev->rlc.cs_data == NULL)
5743 /* begin clear state */
5745 /* context control state */
5748 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5749 for (ext = sect->section; ext->extent != NULL; ++ext) {
5750 if (sect->id == SECT_CONTEXT)
5751 count += 2 + ext->reg_count;
5756 /* pa_sc_raster_config/pa_sc_raster_config1 */
5758 /* end clear state */
5766 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5769 const struct cs_section_def *sect = NULL;
5770 const struct cs_extent_def *ext = NULL;
5772 if (rdev->rlc.cs_data == NULL)
5777 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5778 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5780 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5781 buffer[count++] = 0x80000000;
5782 buffer[count++] = 0x80000000;
5784 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5785 for (ext = sect->section; ext->extent != NULL; ++ext) {
5786 if (sect->id == SECT_CONTEXT) {
5787 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5788 buffer[count++] = ext->reg_index - 0xa000;
5789 for (i = 0; i < ext->reg_count; i++)
5790 buffer[count++] = ext->extent[i];
5797 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5798 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5799 switch (rdev->family) {
5801 buffer[count++] = 0x16000012;
5802 buffer[count++] = 0x00000000;
5805 buffer[count++] = 0x00000000; /* XXX */
5806 buffer[count++] = 0x00000000;
5809 buffer[count++] = 0x00000000; /* XXX */
5810 buffer[count++] = 0x00000000;
5813 buffer[count++] = 0x00000000;
5814 buffer[count++] = 0x00000000;
5818 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5819 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5821 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5822 buffer[count++] = 0;
5825 static void cik_init_pg(struct radeon_device *rdev)
5827 if (rdev->pg_flags) {
5828 cik_enable_sck_slowdown_on_pu(rdev, true);
5829 cik_enable_sck_slowdown_on_pd(rdev, true);
5830 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5831 cik_init_gfx_cgpg(rdev);
5832 cik_enable_cp_pg(rdev, true);
5833 cik_enable_gds_pg(rdev, true);
5835 cik_init_ao_cu_mask(rdev);
5836 cik_update_gfx_pg(rdev, true);
5840 static void cik_fini_pg(struct radeon_device *rdev)
5842 if (rdev->pg_flags) {
5843 cik_update_gfx_pg(rdev, false);
5844 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5845 cik_enable_cp_pg(rdev, false);
5846 cik_enable_gds_pg(rdev, false);
5853 * Starting with r6xx, interrupts are handled via a ring buffer.
5854 * Ring buffers are areas of GPU accessible memory that the GPU
5855 * writes interrupt vectors into and the host reads vectors out of.
5856 * There is a rptr (read pointer) that determines where the
5857 * host is currently reading, and a wptr (write pointer)
5858 * which determines where the GPU has written. When the
5859 * pointers are equal, the ring is idle. When the GPU
5860 * writes vectors to the ring buffer, it increments the
5861 * wptr. When there is an interrupt, the host then starts
5862 * fetching commands and processing them until the pointers are
5863 * equal again at which point it updates the rptr.
5867 * cik_enable_interrupts - Enable the interrupt ring buffer
5869 * @rdev: radeon_device pointer
5871 * Enable the interrupt ring buffer (CIK).
5873 static void cik_enable_interrupts(struct radeon_device *rdev)
5875 u32 ih_cntl = RREG32(IH_CNTL);
5876 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5878 ih_cntl |= ENABLE_INTR;
5879 ih_rb_cntl |= IH_RB_ENABLE;
5880 WREG32(IH_CNTL, ih_cntl);
5881 WREG32(IH_RB_CNTL, ih_rb_cntl);
5882 rdev->ih.enabled = true;
5886 * cik_disable_interrupts - Disable the interrupt ring buffer
5888 * @rdev: radeon_device pointer
5890 * Disable the interrupt ring buffer (CIK).
5892 static void cik_disable_interrupts(struct radeon_device *rdev)
5894 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5895 u32 ih_cntl = RREG32(IH_CNTL);
5897 ih_rb_cntl &= ~IH_RB_ENABLE;
5898 ih_cntl &= ~ENABLE_INTR;
5899 WREG32(IH_RB_CNTL, ih_rb_cntl);
5900 WREG32(IH_CNTL, ih_cntl);
5901 /* set rptr, wptr to 0 */
5902 WREG32(IH_RB_RPTR, 0);
5903 WREG32(IH_RB_WPTR, 0);
5904 rdev->ih.enabled = false;
5909 * cik_disable_interrupt_state - Disable all interrupt sources
5911 * @rdev: radeon_device pointer
5913 * Clear all interrupt enable bits used by the driver (CIK).
5915 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5920 tmp = RREG32(CP_INT_CNTL_RING0) &
5921 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5922 WREG32(CP_INT_CNTL_RING0, tmp);
5924 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5925 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5926 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5927 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5928 /* compute queues */
5929 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5930 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5931 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5932 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5933 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5934 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5935 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5936 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5938 WREG32(GRBM_INT_CNTL, 0);
5939 /* vline/vblank, etc. */
5940 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5941 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5942 if (rdev->num_crtc >= 4) {
5943 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5944 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5946 if (rdev->num_crtc >= 6) {
5947 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5948 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5952 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5954 /* digital hotplug */
5955 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5956 WREG32(DC_HPD1_INT_CONTROL, tmp);
5957 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5958 WREG32(DC_HPD2_INT_CONTROL, tmp);
5959 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 WREG32(DC_HPD3_INT_CONTROL, tmp);
5961 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962 WREG32(DC_HPD4_INT_CONTROL, tmp);
5963 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964 WREG32(DC_HPD5_INT_CONTROL, tmp);
5965 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966 WREG32(DC_HPD6_INT_CONTROL, tmp);
5971 * cik_irq_init - init and enable the interrupt ring
5973 * @rdev: radeon_device pointer
5975 * Allocate a ring buffer for the interrupt controller,
5976 * enable the RLC, disable interrupts, enable the IH
5977 * ring buffer and enable it (CIK).
5978 * Called at device load and reume.
5979 * Returns 0 for success, errors for failure.
5981 static int cik_irq_init(struct radeon_device *rdev)
5985 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5988 ret = r600_ih_ring_alloc(rdev);
5993 cik_disable_interrupts(rdev);
5996 ret = cik_rlc_resume(rdev);
5998 r600_ih_ring_fini(rdev);
6002 /* setup interrupt control */
6003 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6004 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6005 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6006 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6007 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6009 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6010 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6011 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6012 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6014 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6015 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6017 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6018 IH_WPTR_OVERFLOW_CLEAR |
6021 if (rdev->wb.enabled)
6022 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6024 /* set the writeback address whether it's enabled or not */
6025 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6026 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6028 WREG32(IH_RB_CNTL, ih_rb_cntl);
6030 /* set rptr, wptr to 0 */
6031 WREG32(IH_RB_RPTR, 0);
6032 WREG32(IH_RB_WPTR, 0);
6034 /* Default settings for IH_CNTL (disabled at first) */
6035 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6036 /* RPTR_REARM only works if msi's are enabled */
6037 if (rdev->msi_enabled)
6038 ih_cntl |= RPTR_REARM;
6039 WREG32(IH_CNTL, ih_cntl);
6041 /* force the active interrupt state to all disabled */
6042 cik_disable_interrupt_state(rdev);
6044 pci_set_master(rdev->pdev);
6047 cik_enable_interrupts(rdev);
6053 * cik_irq_set - enable/disable interrupt sources
6055 * @rdev: radeon_device pointer
6057 * Enable interrupt sources on the GPU (vblanks, hpd,
6059 * Returns 0 for success, errors for failure.
6061 int cik_irq_set(struct radeon_device *rdev)
6064 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6065 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6066 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6067 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6068 u32 grbm_int_cntl = 0;
6069 u32 dma_cntl, dma_cntl1;
6072 if (!rdev->irq.installed) {
6073 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6076 /* don't enable anything if the ih is disabled */
6077 if (!rdev->ih.enabled) {
6078 cik_disable_interrupts(rdev);
6079 /* force the active interrupt state to all disabled */
6080 cik_disable_interrupt_state(rdev);
6084 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6085 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6086 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6088 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6089 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6090 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6091 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6092 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6093 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6095 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6096 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6098 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6099 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6100 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6101 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6102 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6103 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6104 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6105 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6107 if (rdev->flags & RADEON_IS_IGP)
6108 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6109 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6111 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6112 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6114 /* enable CP interrupts on all rings */
6115 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6116 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6117 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6119 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6120 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6121 DRM_DEBUG("si_irq_set: sw int cp1\n");
6122 if (ring->me == 1) {
6123 switch (ring->pipe) {
6125 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6128 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6131 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6134 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6137 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6140 } else if (ring->me == 2) {
6141 switch (ring->pipe) {
6143 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6146 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6149 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6152 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6155 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6159 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6162 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6163 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6164 DRM_DEBUG("si_irq_set: sw int cp2\n");
6165 if (ring->me == 1) {
6166 switch (ring->pipe) {
6168 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6171 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6174 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6177 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6180 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6183 } else if (ring->me == 2) {
6184 switch (ring->pipe) {
6186 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6189 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6192 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6195 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6198 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6202 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6206 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6207 DRM_DEBUG("cik_irq_set: sw int dma\n");
6208 dma_cntl |= TRAP_ENABLE;
6211 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6212 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6213 dma_cntl1 |= TRAP_ENABLE;
6216 if (rdev->irq.crtc_vblank_int[0] ||
6217 atomic_read(&rdev->irq.pflip[0])) {
6218 DRM_DEBUG("cik_irq_set: vblank 0\n");
6219 crtc1 |= VBLANK_INTERRUPT_MASK;
6221 if (rdev->irq.crtc_vblank_int[1] ||
6222 atomic_read(&rdev->irq.pflip[1])) {
6223 DRM_DEBUG("cik_irq_set: vblank 1\n");
6224 crtc2 |= VBLANK_INTERRUPT_MASK;
6226 if (rdev->irq.crtc_vblank_int[2] ||
6227 atomic_read(&rdev->irq.pflip[2])) {
6228 DRM_DEBUG("cik_irq_set: vblank 2\n");
6229 crtc3 |= VBLANK_INTERRUPT_MASK;
6231 if (rdev->irq.crtc_vblank_int[3] ||
6232 atomic_read(&rdev->irq.pflip[3])) {
6233 DRM_DEBUG("cik_irq_set: vblank 3\n");
6234 crtc4 |= VBLANK_INTERRUPT_MASK;
6236 if (rdev->irq.crtc_vblank_int[4] ||
6237 atomic_read(&rdev->irq.pflip[4])) {
6238 DRM_DEBUG("cik_irq_set: vblank 4\n");
6239 crtc5 |= VBLANK_INTERRUPT_MASK;
6241 if (rdev->irq.crtc_vblank_int[5] ||
6242 atomic_read(&rdev->irq.pflip[5])) {
6243 DRM_DEBUG("cik_irq_set: vblank 5\n");
6244 crtc6 |= VBLANK_INTERRUPT_MASK;
6246 if (rdev->irq.hpd[0]) {
6247 DRM_DEBUG("cik_irq_set: hpd 1\n");
6248 hpd1 |= DC_HPDx_INT_EN;
6250 if (rdev->irq.hpd[1]) {
6251 DRM_DEBUG("cik_irq_set: hpd 2\n");
6252 hpd2 |= DC_HPDx_INT_EN;
6254 if (rdev->irq.hpd[2]) {
6255 DRM_DEBUG("cik_irq_set: hpd 3\n");
6256 hpd3 |= DC_HPDx_INT_EN;
6258 if (rdev->irq.hpd[3]) {
6259 DRM_DEBUG("cik_irq_set: hpd 4\n");
6260 hpd4 |= DC_HPDx_INT_EN;
6262 if (rdev->irq.hpd[4]) {
6263 DRM_DEBUG("cik_irq_set: hpd 5\n");
6264 hpd5 |= DC_HPDx_INT_EN;
6266 if (rdev->irq.hpd[5]) {
6267 DRM_DEBUG("cik_irq_set: hpd 6\n");
6268 hpd6 |= DC_HPDx_INT_EN;
6271 if (rdev->irq.dpm_thermal) {
6272 DRM_DEBUG("dpm thermal\n");
6273 if (rdev->flags & RADEON_IS_IGP)
6274 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6276 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6279 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6281 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6282 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6284 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6285 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6286 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6287 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6288 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6289 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6290 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6291 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6293 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6295 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6296 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6297 if (rdev->num_crtc >= 4) {
6298 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6301 if (rdev->num_crtc >= 6) {
6302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6303 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6306 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6307 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6308 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6309 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6310 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6311 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6313 if (rdev->flags & RADEON_IS_IGP)
6314 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6316 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6322 * cik_irq_ack - ack interrupt sources
6324 * @rdev: radeon_device pointer
6326 * Ack interrupt sources on the GPU (vblanks, hpd,
6327 * etc.) (CIK). Certain interrupts sources are sw
6328 * generated and do not require an explicit ack.
6330 static inline void cik_irq_ack(struct radeon_device *rdev)
6334 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6335 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6336 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6337 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6338 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6339 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6340 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6342 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6343 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6344 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6345 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6346 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6348 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6351 if (rdev->num_crtc >= 4) {
6352 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6353 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6354 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6355 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6356 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6358 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6362 if (rdev->num_crtc >= 6) {
6363 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6364 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6365 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6366 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6367 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6369 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6373 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6374 tmp = RREG32(DC_HPD1_INT_CONTROL);
6375 tmp |= DC_HPDx_INT_ACK;
6376 WREG32(DC_HPD1_INT_CONTROL, tmp);
6378 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6379 tmp = RREG32(DC_HPD2_INT_CONTROL);
6380 tmp |= DC_HPDx_INT_ACK;
6381 WREG32(DC_HPD2_INT_CONTROL, tmp);
6383 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6384 tmp = RREG32(DC_HPD3_INT_CONTROL);
6385 tmp |= DC_HPDx_INT_ACK;
6386 WREG32(DC_HPD3_INT_CONTROL, tmp);
6388 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6389 tmp = RREG32(DC_HPD4_INT_CONTROL);
6390 tmp |= DC_HPDx_INT_ACK;
6391 WREG32(DC_HPD4_INT_CONTROL, tmp);
6393 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6394 tmp = RREG32(DC_HPD5_INT_CONTROL);
6395 tmp |= DC_HPDx_INT_ACK;
6396 WREG32(DC_HPD5_INT_CONTROL, tmp);
6398 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6399 tmp = RREG32(DC_HPD5_INT_CONTROL);
6400 tmp |= DC_HPDx_INT_ACK;
6401 WREG32(DC_HPD6_INT_CONTROL, tmp);
6406 * cik_irq_disable - disable interrupts
6408 * @rdev: radeon_device pointer
6410 * Disable interrupts on the hw (CIK).
6412 static void cik_irq_disable(struct radeon_device *rdev)
6414 cik_disable_interrupts(rdev);
6415 /* Wait and acknowledge irq */
6418 cik_disable_interrupt_state(rdev);
6422 * cik_irq_disable - disable interrupts for suspend
6424 * @rdev: radeon_device pointer
6426 * Disable interrupts and stop the RLC (CIK).
6429 static void cik_irq_suspend(struct radeon_device *rdev)
6431 cik_irq_disable(rdev);
6436 * cik_irq_fini - tear down interrupt support
6438 * @rdev: radeon_device pointer
6440 * Disable interrupts on the hw and free the IH ring
6442 * Used for driver unload.
6444 static void cik_irq_fini(struct radeon_device *rdev)
6446 cik_irq_suspend(rdev);
6447 r600_ih_ring_fini(rdev);
6451 * cik_get_ih_wptr - get the IH ring buffer wptr
6453 * @rdev: radeon_device pointer
6455 * Get the IH ring buffer wptr from either the register
6456 * or the writeback memory buffer (CIK). Also check for
6457 * ring buffer overflow and deal with it.
6458 * Used by cik_irq_process().
6459 * Returns the value of the wptr.
6461 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6465 if (rdev->wb.enabled)
6466 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6468 wptr = RREG32(IH_RB_WPTR);
6470 if (wptr & RB_OVERFLOW) {
6471 /* When a ring buffer overflow happen start parsing interrupt
6472 * from the last not overwritten vector (wptr + 16). Hopefully
6473 * this should allow us to catchup.
6475 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6476 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6477 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6478 tmp = RREG32(IH_RB_CNTL);
6479 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6480 WREG32(IH_RB_CNTL, tmp);
6482 return (wptr & rdev->ih.ptr_mask);
6486 * Each IV ring entry is 128 bits:
6487 * [7:0] - interrupt source id
6489 * [59:32] - interrupt source data
6490 * [63:60] - reserved
6493 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6494 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6495 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6496 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6497 * PIPE_ID - ME0 0=3D
6498 * - ME1&2 compute dispatcher (4 pipes each)
6500 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6501 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6502 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6505 * [127:96] - reserved
6508 * cik_irq_process - interrupt handler
6510 * @rdev: radeon_device pointer
6512 * Interrupt hander (CIK). Walk the IH ring,
6513 * ack interrupts and schedule work to handle
6515 * Returns irq process return code.
6517 int cik_irq_process(struct radeon_device *rdev)
6519 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6520 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6523 u32 src_id, src_data, ring_id;
6524 u8 me_id, pipe_id, queue_id;
6526 bool queue_hotplug = false;
6527 bool queue_reset = false;
6528 u32 addr, status, mc_client;
6529 bool queue_thermal = false;
6531 if (!rdev->ih.enabled || rdev->shutdown)
6534 wptr = cik_get_ih_wptr(rdev);
6537 /* is somebody else already processing irqs? */
6538 if (atomic_xchg(&rdev->ih.lock, 1))
6541 rptr = rdev->ih.rptr;
6542 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6544 /* Order reading of wptr vs. reading of IH ring data */
6547 /* display interrupts */
6550 while (rptr != wptr) {
6551 /* wptr/rptr are in bytes! */
6552 ring_index = rptr / 4;
6553 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6554 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6555 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6558 case 1: /* D1 vblank/vline */
6560 case 0: /* D1 vblank */
6561 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6562 if (rdev->irq.crtc_vblank_int[0]) {
6563 drm_handle_vblank(rdev->ddev, 0);
6564 rdev->pm.vblank_sync = true;
6565 wake_up(&rdev->irq.vblank_queue);
6567 if (atomic_read(&rdev->irq.pflip[0]))
6568 radeon_crtc_handle_flip(rdev, 0);
6569 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6570 DRM_DEBUG("IH: D1 vblank\n");
6573 case 1: /* D1 vline */
6574 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6575 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6576 DRM_DEBUG("IH: D1 vline\n");
6580 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6584 case 2: /* D2 vblank/vline */
6586 case 0: /* D2 vblank */
6587 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6588 if (rdev->irq.crtc_vblank_int[1]) {
6589 drm_handle_vblank(rdev->ddev, 1);
6590 rdev->pm.vblank_sync = true;
6591 wake_up(&rdev->irq.vblank_queue);
6593 if (atomic_read(&rdev->irq.pflip[1]))
6594 radeon_crtc_handle_flip(rdev, 1);
6595 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6596 DRM_DEBUG("IH: D2 vblank\n");
6599 case 1: /* D2 vline */
6600 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6601 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6602 DRM_DEBUG("IH: D2 vline\n");
6606 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6610 case 3: /* D3 vblank/vline */
6612 case 0: /* D3 vblank */
6613 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6614 if (rdev->irq.crtc_vblank_int[2]) {
6615 drm_handle_vblank(rdev->ddev, 2);
6616 rdev->pm.vblank_sync = true;
6617 wake_up(&rdev->irq.vblank_queue);
6619 if (atomic_read(&rdev->irq.pflip[2]))
6620 radeon_crtc_handle_flip(rdev, 2);
6621 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6622 DRM_DEBUG("IH: D3 vblank\n");
6625 case 1: /* D3 vline */
6626 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6627 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6628 DRM_DEBUG("IH: D3 vline\n");
6632 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6636 case 4: /* D4 vblank/vline */
6638 case 0: /* D4 vblank */
6639 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6640 if (rdev->irq.crtc_vblank_int[3]) {
6641 drm_handle_vblank(rdev->ddev, 3);
6642 rdev->pm.vblank_sync = true;
6643 wake_up(&rdev->irq.vblank_queue);
6645 if (atomic_read(&rdev->irq.pflip[3]))
6646 radeon_crtc_handle_flip(rdev, 3);
6647 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6648 DRM_DEBUG("IH: D4 vblank\n");
6651 case 1: /* D4 vline */
6652 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6653 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6654 DRM_DEBUG("IH: D4 vline\n");
6658 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6662 case 5: /* D5 vblank/vline */
6664 case 0: /* D5 vblank */
6665 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6666 if (rdev->irq.crtc_vblank_int[4]) {
6667 drm_handle_vblank(rdev->ddev, 4);
6668 rdev->pm.vblank_sync = true;
6669 wake_up(&rdev->irq.vblank_queue);
6671 if (atomic_read(&rdev->irq.pflip[4]))
6672 radeon_crtc_handle_flip(rdev, 4);
6673 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6674 DRM_DEBUG("IH: D5 vblank\n");
6677 case 1: /* D5 vline */
6678 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6679 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6680 DRM_DEBUG("IH: D5 vline\n");
6684 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6688 case 6: /* D6 vblank/vline */
6690 case 0: /* D6 vblank */
6691 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6692 if (rdev->irq.crtc_vblank_int[5]) {
6693 drm_handle_vblank(rdev->ddev, 5);
6694 rdev->pm.vblank_sync = true;
6695 wake_up(&rdev->irq.vblank_queue);
6697 if (atomic_read(&rdev->irq.pflip[5]))
6698 radeon_crtc_handle_flip(rdev, 5);
6699 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6700 DRM_DEBUG("IH: D6 vblank\n");
6703 case 1: /* D6 vline */
6704 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6705 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6706 DRM_DEBUG("IH: D6 vline\n");
6710 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6714 case 42: /* HPD hotplug */
6717 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6718 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6719 queue_hotplug = true;
6720 DRM_DEBUG("IH: HPD1\n");
6724 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6725 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6726 queue_hotplug = true;
6727 DRM_DEBUG("IH: HPD2\n");
6731 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6732 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6733 queue_hotplug = true;
6734 DRM_DEBUG("IH: HPD3\n");
6738 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6739 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6740 queue_hotplug = true;
6741 DRM_DEBUG("IH: HPD4\n");
6745 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6746 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6747 queue_hotplug = true;
6748 DRM_DEBUG("IH: HPD5\n");
6752 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6753 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6754 queue_hotplug = true;
6755 DRM_DEBUG("IH: HPD6\n");
6759 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6764 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6765 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6769 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6770 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6771 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6772 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6773 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
6775 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6777 cik_vm_decode_fault(rdev, status, addr, mc_client);
6778 /* reset addr and status */
6779 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6781 case 176: /* GFX RB CP_INT */
6782 case 177: /* GFX IB CP_INT */
6783 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6785 case 181: /* CP EOP event */
6786 DRM_DEBUG("IH: CP EOP\n");
6787 /* XXX check the bitfield order! */
6788 me_id = (ring_id & 0x60) >> 5;
6789 pipe_id = (ring_id & 0x18) >> 3;
6790 queue_id = (ring_id & 0x7) >> 0;
6793 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6797 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6798 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6799 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6800 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6804 case 184: /* CP Privileged reg access */
6805 DRM_ERROR("Illegal register access in command stream\n");
6806 /* XXX check the bitfield order! */
6807 me_id = (ring_id & 0x60) >> 5;
6808 pipe_id = (ring_id & 0x18) >> 3;
6809 queue_id = (ring_id & 0x7) >> 0;
6812 /* This results in a full GPU reset, but all we need to do is soft
6813 * reset the CP for gfx
6827 case 185: /* CP Privileged inst */
6828 DRM_ERROR("Illegal instruction in command stream\n");
6829 /* XXX check the bitfield order! */
6830 me_id = (ring_id & 0x60) >> 5;
6831 pipe_id = (ring_id & 0x18) >> 3;
6832 queue_id = (ring_id & 0x7) >> 0;
6835 /* This results in a full GPU reset, but all we need to do is soft
6836 * reset the CP for gfx
6850 case 224: /* SDMA trap event */
6851 /* XXX check the bitfield order! */
6852 me_id = (ring_id & 0x3) >> 0;
6853 queue_id = (ring_id & 0xc) >> 2;
6854 DRM_DEBUG("IH: SDMA trap\n");
6859 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6872 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6884 case 230: /* thermal low to high */
6885 DRM_DEBUG("IH: thermal low to high\n");
6886 rdev->pm.dpm.thermal.high_to_low = false;
6887 queue_thermal = true;
6889 case 231: /* thermal high to low */
6890 DRM_DEBUG("IH: thermal high to low\n");
6891 rdev->pm.dpm.thermal.high_to_low = true;
6892 queue_thermal = true;
6894 case 233: /* GUI IDLE */
6895 DRM_DEBUG("IH: GUI idle\n");
6897 case 241: /* SDMA Privileged inst */
6898 case 247: /* SDMA Privileged inst */
6899 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6900 /* XXX check the bitfield order! */
6901 me_id = (ring_id & 0x3) >> 0;
6902 queue_id = (ring_id & 0xc) >> 2;
6937 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6941 /* wptr/rptr are in bytes! */
6943 rptr &= rdev->ih.ptr_mask;
6946 schedule_work(&rdev->hotplug_work);
6948 schedule_work(&rdev->reset_work);
6950 schedule_work(&rdev->pm.dpm.thermal.work);
6951 rdev->ih.rptr = rptr;
6952 WREG32(IH_RB_RPTR, rdev->ih.rptr);
6953 atomic_set(&rdev->ih.lock, 0);
6955 /* make sure wptr hasn't changed while processing */
6956 wptr = cik_get_ih_wptr(rdev);
6964 * startup/shutdown callbacks
6967 * cik_startup - program the asic to a functional state
6969 * @rdev: radeon_device pointer
6971 * Programs the asic to a functional state (CIK).
6972 * Called by cik_init() and cik_resume().
6973 * Returns 0 for success, error for failure.
6975 static int cik_startup(struct radeon_device *rdev)
6977 struct radeon_ring *ring;
6980 /* enable pcie gen2/3 link */
6981 cik_pcie_gen3_enable(rdev);
6983 cik_program_aspm(rdev);
6985 /* scratch needs to be initialized before MC */
6986 r = r600_vram_scratch_init(rdev);
6990 cik_mc_program(rdev);
6992 if (rdev->flags & RADEON_IS_IGP) {
6993 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6994 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6995 r = cik_init_microcode(rdev);
6997 DRM_ERROR("Failed to load firmware!\n");
7002 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7003 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7005 r = cik_init_microcode(rdev);
7007 DRM_ERROR("Failed to load firmware!\n");
7012 r = ci_mc_load_microcode(rdev);
7014 DRM_ERROR("Failed to load MC firmware!\n");
7019 r = cik_pcie_gart_enable(rdev);
7024 /* allocate rlc buffers */
7025 if (rdev->flags & RADEON_IS_IGP) {
7026 if (rdev->family == CHIP_KAVERI) {
7027 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7028 rdev->rlc.reg_list_size =
7029 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7031 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7032 rdev->rlc.reg_list_size =
7033 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7036 rdev->rlc.cs_data = ci_cs_data;
7037 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7038 r = sumo_rlc_init(rdev);
7040 DRM_ERROR("Failed to init rlc BOs!\n");
7044 /* allocate wb buffer */
7045 r = radeon_wb_init(rdev);
7049 /* allocate mec buffers */
7050 r = cik_mec_init(rdev);
7052 DRM_ERROR("Failed to init MEC BOs!\n");
7056 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7058 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7062 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7064 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7068 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7070 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7074 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7076 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7080 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7082 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7086 r = radeon_uvd_resume(rdev);
7088 r = uvd_v4_2_resume(rdev);
7090 r = radeon_fence_driver_start_ring(rdev,
7091 R600_RING_TYPE_UVD_INDEX);
7093 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7097 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7100 if (!rdev->irq.installed) {
7101 r = radeon_irq_kms_init(rdev);
7106 r = cik_irq_init(rdev);
7108 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7109 radeon_irq_kms_fini(rdev);
7114 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7115 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7116 CP_RB0_RPTR, CP_RB0_WPTR,
7121 /* set up the compute queues */
7122 /* type-2 packets are deprecated on MEC, use type-3 instead */
7123 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7124 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7125 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7126 PACKET3(PACKET3_NOP, 0x3FFF));
7129 ring->me = 1; /* first MEC */
7130 ring->pipe = 0; /* first pipe */
7131 ring->queue = 0; /* first queue */
7132 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7134 /* type-2 packets are deprecated on MEC, use type-3 instead */
7135 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7136 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7137 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7138 PACKET3(PACKET3_NOP, 0x3FFF));
7141 /* dGPU only have 1 MEC */
7142 ring->me = 1; /* first MEC */
7143 ring->pipe = 0; /* first pipe */
7144 ring->queue = 1; /* second queue */
7145 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7147 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7148 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7149 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7150 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7151 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7155 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7156 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7157 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7158 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7159 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7163 r = cik_cp_resume(rdev);
7167 r = cik_sdma_resume(rdev);
7171 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7172 if (ring->ring_size) {
7173 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7174 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7177 r = uvd_v1_0_init(rdev);
7179 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7182 r = radeon_ib_pool_init(rdev);
7184 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7188 r = radeon_vm_manager_init(rdev);
7190 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7194 r = dce6_audio_init(rdev);
7202 * cik_resume - resume the asic to a functional state
7204 * @rdev: radeon_device pointer
7206 * Programs the asic to a functional state (CIK).
7208 * Returns 0 for success, error for failure.
7210 int cik_resume(struct radeon_device *rdev)
7215 atom_asic_init(rdev->mode_info.atom_context);
7217 /* init golden registers */
7218 cik_init_golden_registers(rdev);
7220 rdev->accel_working = true;
7221 r = cik_startup(rdev);
7223 DRM_ERROR("cik startup failed on resume\n");
7224 rdev->accel_working = false;
7233 * cik_suspend - suspend the asic
7235 * @rdev: radeon_device pointer
7237 * Bring the chip into a state suitable for suspend (CIK).
7238 * Called at suspend.
7239 * Returns 0 for success.
7241 int cik_suspend(struct radeon_device *rdev)
7243 dce6_audio_fini(rdev);
7244 radeon_vm_manager_fini(rdev);
7245 cik_cp_enable(rdev, false);
7246 cik_sdma_enable(rdev, false);
7247 uvd_v1_0_fini(rdev);
7248 radeon_uvd_suspend(rdev);
7251 cik_irq_suspend(rdev);
7252 radeon_wb_disable(rdev);
7253 cik_pcie_gart_disable(rdev);
7257 /* Plan is to move initialization in that function and use
7258 * helper function so that radeon_device_init pretty much
7259 * do nothing more than calling asic specific function. This
7260 * should also allow to remove a bunch of callback function
7264 * cik_init - asic specific driver and hw init
7266 * @rdev: radeon_device pointer
7268 * Setup asic specific driver variables and program the hw
7269 * to a functional state (CIK).
7270 * Called at driver startup.
7271 * Returns 0 for success, errors for failure.
7273 int cik_init(struct radeon_device *rdev)
7275 struct radeon_ring *ring;
7279 if (!radeon_get_bios(rdev)) {
7280 if (ASIC_IS_AVIVO(rdev))
7283 /* Must be an ATOMBIOS */
7284 if (!rdev->is_atom_bios) {
7285 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7288 r = radeon_atombios_init(rdev);
7292 /* Post card if necessary */
7293 if (!radeon_card_posted(rdev)) {
7295 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7298 DRM_INFO("GPU not posted. posting now...\n");
7299 atom_asic_init(rdev->mode_info.atom_context);
7301 /* init golden registers */
7302 cik_init_golden_registers(rdev);
7303 /* Initialize scratch registers */
7304 cik_scratch_init(rdev);
7305 /* Initialize surface registers */
7306 radeon_surface_init(rdev);
7307 /* Initialize clocks */
7308 radeon_get_clock_info(rdev->ddev);
7311 r = radeon_fence_driver_init(rdev);
7315 /* initialize memory controller */
7316 r = cik_mc_init(rdev);
7319 /* Memory manager */
7320 r = radeon_bo_init(rdev);
7324 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7325 ring->ring_obj = NULL;
7326 r600_ring_init(rdev, ring, 1024 * 1024);
7328 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7329 ring->ring_obj = NULL;
7330 r600_ring_init(rdev, ring, 1024 * 1024);
7331 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7335 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7336 ring->ring_obj = NULL;
7337 r600_ring_init(rdev, ring, 1024 * 1024);
7338 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7342 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7343 ring->ring_obj = NULL;
7344 r600_ring_init(rdev, ring, 256 * 1024);
7346 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7347 ring->ring_obj = NULL;
7348 r600_ring_init(rdev, ring, 256 * 1024);
7350 r = radeon_uvd_init(rdev);
7352 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7353 ring->ring_obj = NULL;
7354 r600_ring_init(rdev, ring, 4096);
7357 rdev->ih.ring_obj = NULL;
7358 r600_ih_ring_init(rdev, 64 * 1024);
7360 r = r600_pcie_gart_init(rdev);
7364 rdev->accel_working = true;
7365 r = cik_startup(rdev);
7367 dev_err(rdev->dev, "disabling GPU acceleration\n");
7369 cik_sdma_fini(rdev);
7371 sumo_rlc_fini(rdev);
7373 radeon_wb_fini(rdev);
7374 radeon_ib_pool_fini(rdev);
7375 radeon_vm_manager_fini(rdev);
7376 radeon_irq_kms_fini(rdev);
7377 cik_pcie_gart_fini(rdev);
7378 rdev->accel_working = false;
7381 /* Don't start up if the MC ucode is missing.
7382 * The default clocks and voltages before the MC ucode
7383 * is loaded are not suffient for advanced operations.
7385 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7386 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7394 * cik_fini - asic specific driver and hw fini
7396 * @rdev: radeon_device pointer
7398 * Tear down the asic specific driver variables and program the hw
7399 * to an idle state (CIK).
7400 * Called at driver unload.
7402 void cik_fini(struct radeon_device *rdev)
7405 cik_sdma_fini(rdev);
7409 sumo_rlc_fini(rdev);
7411 radeon_wb_fini(rdev);
7412 radeon_vm_manager_fini(rdev);
7413 radeon_ib_pool_fini(rdev);
7414 radeon_irq_kms_fini(rdev);
7415 uvd_v1_0_fini(rdev);
7416 radeon_uvd_fini(rdev);
7417 cik_pcie_gart_fini(rdev);
7418 r600_vram_scratch_fini(rdev);
7419 radeon_gem_fini(rdev);
7420 radeon_fence_driver_fini(rdev);
7421 radeon_bo_fini(rdev);
7422 radeon_atombios_fini(rdev);
7427 /* display watermark setup */
7429 * dce8_line_buffer_adjust - Set up the line buffer
7431 * @rdev: radeon_device pointer
7432 * @radeon_crtc: the selected display controller
7433 * @mode: the current display mode on the selected display
7436 * Setup up the line buffer allocation for
7437 * the selected display controller (CIK).
7438 * Returns the line buffer size in pixels.
7440 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7441 struct radeon_crtc *radeon_crtc,
7442 struct drm_display_mode *mode)
7444 u32 tmp, buffer_alloc, i;
7445 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7448 * There are 6 line buffers, one for each display controllers.
7449 * There are 3 partitions per LB. Select the number of partitions
7450 * to enable based on the display width. For display widths larger
7451 * than 4096, you need use to use 2 display controllers and combine
7452 * them using the stereo blender.
7454 if (radeon_crtc->base.enabled && mode) {
7455 if (mode->crtc_hdisplay < 1920) {
7458 } else if (mode->crtc_hdisplay < 2560) {
7461 } else if (mode->crtc_hdisplay < 4096) {
7463 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7465 DRM_DEBUG_KMS("Mode too big for LB!\n");
7467 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7474 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7475 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7477 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7478 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7479 for (i = 0; i < rdev->usec_timeout; i++) {
7480 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7481 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7486 if (radeon_crtc->base.enabled && mode) {
7498 /* controller not enabled, so no lb used */
7503 * cik_get_number_of_dram_channels - get the number of dram channels
7505 * @rdev: radeon_device pointer
7507 * Look up the number of video ram channels (CIK).
7508 * Used for display watermark bandwidth calculations
7509 * Returns the number of dram channels
7511 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7513 u32 tmp = RREG32(MC_SHARED_CHMAP);
7515 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7538 struct dce8_wm_params {
7539 u32 dram_channels; /* number of dram channels */
7540 u32 yclk; /* bandwidth per dram data pin in kHz */
7541 u32 sclk; /* engine clock in kHz */
7542 u32 disp_clk; /* display clock in kHz */
7543 u32 src_width; /* viewport width */
7544 u32 active_time; /* active display time in ns */
7545 u32 blank_time; /* blank time in ns */
7546 bool interlaced; /* mode is interlaced */
7547 fixed20_12 vsc; /* vertical scale ratio */
7548 u32 num_heads; /* number of active crtcs */
7549 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7550 u32 lb_size; /* line buffer allocated to pipe */
7551 u32 vtaps; /* vertical scaler taps */
7555 * dce8_dram_bandwidth - get the dram bandwidth
7557 * @wm: watermark calculation data
7559 * Calculate the raw dram bandwidth (CIK).
7560 * Used for display watermark bandwidth calculations
7561 * Returns the dram bandwidth in MBytes/s
7563 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7565 /* Calculate raw DRAM Bandwidth */
7566 fixed20_12 dram_efficiency; /* 0.7 */
7567 fixed20_12 yclk, dram_channels, bandwidth;
7570 a.full = dfixed_const(1000);
7571 yclk.full = dfixed_const(wm->yclk);
7572 yclk.full = dfixed_div(yclk, a);
7573 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7574 a.full = dfixed_const(10);
7575 dram_efficiency.full = dfixed_const(7);
7576 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7577 bandwidth.full = dfixed_mul(dram_channels, yclk);
7578 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7580 return dfixed_trunc(bandwidth);
7584 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7586 * @wm: watermark calculation data
7588 * Calculate the dram bandwidth used for display (CIK).
7589 * Used for display watermark bandwidth calculations
7590 * Returns the dram bandwidth for display in MBytes/s
7592 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7594 /* Calculate DRAM Bandwidth and the part allocated to display. */
7595 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7596 fixed20_12 yclk, dram_channels, bandwidth;
7599 a.full = dfixed_const(1000);
7600 yclk.full = dfixed_const(wm->yclk);
7601 yclk.full = dfixed_div(yclk, a);
7602 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7603 a.full = dfixed_const(10);
7604 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7605 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7606 bandwidth.full = dfixed_mul(dram_channels, yclk);
7607 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7609 return dfixed_trunc(bandwidth);
7613 * dce8_data_return_bandwidth - get the data return bandwidth
7615 * @wm: watermark calculation data
7617 * Calculate the data return bandwidth used for display (CIK).
7618 * Used for display watermark bandwidth calculations
7619 * Returns the data return bandwidth in MBytes/s
7621 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7623 /* Calculate the display Data return Bandwidth */
7624 fixed20_12 return_efficiency; /* 0.8 */
7625 fixed20_12 sclk, bandwidth;
7628 a.full = dfixed_const(1000);
7629 sclk.full = dfixed_const(wm->sclk);
7630 sclk.full = dfixed_div(sclk, a);
7631 a.full = dfixed_const(10);
7632 return_efficiency.full = dfixed_const(8);
7633 return_efficiency.full = dfixed_div(return_efficiency, a);
7634 a.full = dfixed_const(32);
7635 bandwidth.full = dfixed_mul(a, sclk);
7636 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7638 return dfixed_trunc(bandwidth);
7642 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7644 * @wm: watermark calculation data
7646 * Calculate the dmif bandwidth used for display (CIK).
7647 * Used for display watermark bandwidth calculations
7648 * Returns the dmif bandwidth in MBytes/s
7650 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7652 /* Calculate the DMIF Request Bandwidth */
7653 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7654 fixed20_12 disp_clk, bandwidth;
7657 a.full = dfixed_const(1000);
7658 disp_clk.full = dfixed_const(wm->disp_clk);
7659 disp_clk.full = dfixed_div(disp_clk, a);
7660 a.full = dfixed_const(32);
7661 b.full = dfixed_mul(a, disp_clk);
7663 a.full = dfixed_const(10);
7664 disp_clk_request_efficiency.full = dfixed_const(8);
7665 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7667 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7669 return dfixed_trunc(bandwidth);
7673 * dce8_available_bandwidth - get the min available bandwidth
7675 * @wm: watermark calculation data
7677 * Calculate the min available bandwidth used for display (CIK).
7678 * Used for display watermark bandwidth calculations
7679 * Returns the min available bandwidth in MBytes/s
7681 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7683 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7684 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7685 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7686 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7688 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7692 * dce8_average_bandwidth - get the average available bandwidth
7694 * @wm: watermark calculation data
7696 * Calculate the average available bandwidth used for display (CIK).
7697 * Used for display watermark bandwidth calculations
7698 * Returns the average available bandwidth in MBytes/s
7700 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7702 /* Calculate the display mode Average Bandwidth
7703 * DisplayMode should contain the source and destination dimensions,
7707 fixed20_12 line_time;
7708 fixed20_12 src_width;
7709 fixed20_12 bandwidth;
7712 a.full = dfixed_const(1000);
7713 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7714 line_time.full = dfixed_div(line_time, a);
7715 bpp.full = dfixed_const(wm->bytes_per_pixel);
7716 src_width.full = dfixed_const(wm->src_width);
7717 bandwidth.full = dfixed_mul(src_width, bpp);
7718 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7719 bandwidth.full = dfixed_div(bandwidth, line_time);
7721 return dfixed_trunc(bandwidth);
7725 * dce8_latency_watermark - get the latency watermark
7727 * @wm: watermark calculation data
7729 * Calculate the latency watermark (CIK).
7730 * Used for display watermark bandwidth calculations
7731 * Returns the latency watermark in ns
7733 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7735 /* First calculate the latency in ns */
7736 u32 mc_latency = 2000; /* 2000 ns. */
7737 u32 available_bandwidth = dce8_available_bandwidth(wm);
7738 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7739 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7740 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7741 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7742 (wm->num_heads * cursor_line_pair_return_time);
7743 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7744 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7745 u32 tmp, dmif_size = 12288;
7748 if (wm->num_heads == 0)
7751 a.full = dfixed_const(2);
7752 b.full = dfixed_const(1);
7753 if ((wm->vsc.full > a.full) ||
7754 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7756 ((wm->vsc.full >= a.full) && wm->interlaced))
7757 max_src_lines_per_dst_line = 4;
7759 max_src_lines_per_dst_line = 2;
7761 a.full = dfixed_const(available_bandwidth);
7762 b.full = dfixed_const(wm->num_heads);
7763 a.full = dfixed_div(a, b);
7765 b.full = dfixed_const(mc_latency + 512);
7766 c.full = dfixed_const(wm->disp_clk);
7767 b.full = dfixed_div(b, c);
7769 c.full = dfixed_const(dmif_size);
7770 b.full = dfixed_div(c, b);
7772 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7774 b.full = dfixed_const(1000);
7775 c.full = dfixed_const(wm->disp_clk);
7776 b.full = dfixed_div(c, b);
7777 c.full = dfixed_const(wm->bytes_per_pixel);
7778 b.full = dfixed_mul(b, c);
7780 lb_fill_bw = min(tmp, dfixed_trunc(b));
7782 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7783 b.full = dfixed_const(1000);
7784 c.full = dfixed_const(lb_fill_bw);
7785 b.full = dfixed_div(c, b);
7786 a.full = dfixed_div(a, b);
7787 line_fill_time = dfixed_trunc(a);
7789 if (line_fill_time < wm->active_time)
7792 return latency + (line_fill_time - wm->active_time);
7797 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7798 * average and available dram bandwidth
7800 * @wm: watermark calculation data
7802 * Check if the display average bandwidth fits in the display
7803 * dram bandwidth (CIK).
7804 * Used for display watermark bandwidth calculations
7805 * Returns true if the display fits, false if not.
7807 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7809 if (dce8_average_bandwidth(wm) <=
7810 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7817 * dce8_average_bandwidth_vs_available_bandwidth - check
7818 * average and available bandwidth
7820 * @wm: watermark calculation data
7822 * Check if the display average bandwidth fits in the display
7823 * available bandwidth (CIK).
7824 * Used for display watermark bandwidth calculations
7825 * Returns true if the display fits, false if not.
7827 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7829 if (dce8_average_bandwidth(wm) <=
7830 (dce8_available_bandwidth(wm) / wm->num_heads))
7837 * dce8_check_latency_hiding - check latency hiding
7839 * @wm: watermark calculation data
7841 * Check latency hiding (CIK).
7842 * Used for display watermark bandwidth calculations
7843 * Returns true if the display fits, false if not.
7845 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7847 u32 lb_partitions = wm->lb_size / wm->src_width;
7848 u32 line_time = wm->active_time + wm->blank_time;
7849 u32 latency_tolerant_lines;
7853 a.full = dfixed_const(1);
7854 if (wm->vsc.full > a.full)
7855 latency_tolerant_lines = 1;
7857 if (lb_partitions <= (wm->vtaps + 1))
7858 latency_tolerant_lines = 1;
7860 latency_tolerant_lines = 2;
7863 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7865 if (dce8_latency_watermark(wm) <= latency_hiding)
7872 * dce8_program_watermarks - program display watermarks
7874 * @rdev: radeon_device pointer
7875 * @radeon_crtc: the selected display controller
7876 * @lb_size: line buffer size
7877 * @num_heads: number of display controllers in use
7879 * Calculate and program the display watermarks for the
7880 * selected display controller (CIK).
7882 static void dce8_program_watermarks(struct radeon_device *rdev,
7883 struct radeon_crtc *radeon_crtc,
7884 u32 lb_size, u32 num_heads)
7886 struct drm_display_mode *mode = &radeon_crtc->base.mode;
7887 struct dce8_wm_params wm_low, wm_high;
7890 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7893 if (radeon_crtc->base.enabled && num_heads && mode) {
7894 pixel_period = 1000000 / (u32)mode->clock;
7895 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7897 /* watermark for high clocks */
7898 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7899 rdev->pm.dpm_enabled) {
7901 radeon_dpm_get_mclk(rdev, false) * 10;
7903 radeon_dpm_get_sclk(rdev, false) * 10;
7905 wm_high.yclk = rdev->pm.current_mclk * 10;
7906 wm_high.sclk = rdev->pm.current_sclk * 10;
7909 wm_high.disp_clk = mode->clock;
7910 wm_high.src_width = mode->crtc_hdisplay;
7911 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7912 wm_high.blank_time = line_time - wm_high.active_time;
7913 wm_high.interlaced = false;
7914 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7915 wm_high.interlaced = true;
7916 wm_high.vsc = radeon_crtc->vsc;
7918 if (radeon_crtc->rmx_type != RMX_OFF)
7920 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7921 wm_high.lb_size = lb_size;
7922 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7923 wm_high.num_heads = num_heads;
7925 /* set for high clocks */
7926 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7928 /* possibly force display priority to high */
7929 /* should really do this at mode validation time... */
7930 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7931 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7932 !dce8_check_latency_hiding(&wm_high) ||
7933 (rdev->disp_priority == 2)) {
7934 DRM_DEBUG_KMS("force priority to high\n");
7937 /* watermark for low clocks */
7938 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7939 rdev->pm.dpm_enabled) {
7941 radeon_dpm_get_mclk(rdev, true) * 10;
7943 radeon_dpm_get_sclk(rdev, true) * 10;
7945 wm_low.yclk = rdev->pm.current_mclk * 10;
7946 wm_low.sclk = rdev->pm.current_sclk * 10;
7949 wm_low.disp_clk = mode->clock;
7950 wm_low.src_width = mode->crtc_hdisplay;
7951 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7952 wm_low.blank_time = line_time - wm_low.active_time;
7953 wm_low.interlaced = false;
7954 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7955 wm_low.interlaced = true;
7956 wm_low.vsc = radeon_crtc->vsc;
7958 if (radeon_crtc->rmx_type != RMX_OFF)
7960 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7961 wm_low.lb_size = lb_size;
7962 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7963 wm_low.num_heads = num_heads;
7965 /* set for low clocks */
7966 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7968 /* possibly force display priority to high */
7969 /* should really do this at mode validation time... */
7970 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7971 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7972 !dce8_check_latency_hiding(&wm_low) ||
7973 (rdev->disp_priority == 2)) {
7974 DRM_DEBUG_KMS("force priority to high\n");
7979 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7981 tmp &= ~LATENCY_WATERMARK_MASK(3);
7982 tmp |= LATENCY_WATERMARK_MASK(1);
7983 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7984 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7985 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7986 LATENCY_HIGH_WATERMARK(line_time)));
7988 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7989 tmp &= ~LATENCY_WATERMARK_MASK(3);
7990 tmp |= LATENCY_WATERMARK_MASK(2);
7991 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7992 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7993 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7994 LATENCY_HIGH_WATERMARK(line_time)));
7995 /* restore original selection */
7996 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7998 /* save values for DPM */
7999 radeon_crtc->line_time = line_time;
8000 radeon_crtc->wm_high = latency_watermark_a;
8001 radeon_crtc->wm_low = latency_watermark_b;
8005 * dce8_bandwidth_update - program display watermarks
8007 * @rdev: radeon_device pointer
8009 * Calculate and program the display watermarks and line
8010 * buffer allocation (CIK).
8012 void dce8_bandwidth_update(struct radeon_device *rdev)
8014 struct drm_display_mode *mode = NULL;
8015 u32 num_heads = 0, lb_size;
8018 radeon_update_display_priority(rdev);
8020 for (i = 0; i < rdev->num_crtc; i++) {
8021 if (rdev->mode_info.crtcs[i]->base.enabled)
8024 for (i = 0; i < rdev->num_crtc; i++) {
8025 mode = &rdev->mode_info.crtcs[i]->base.mode;
8026 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8027 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8032 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8034 * @rdev: radeon_device pointer
8036 * Fetches a GPU clock counter snapshot (SI).
8037 * Returns the 64 bit clock counter snapshot.
8039 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8043 mutex_lock(&rdev->gpu_clock_mutex);
8044 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8045 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8046 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8047 mutex_unlock(&rdev->gpu_clock_mutex);
8051 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8052 u32 cntl_reg, u32 status_reg)
8055 struct atom_clock_dividers dividers;
8058 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8059 clock, false, ÷rs);
8063 tmp = RREG32_SMC(cntl_reg);
8064 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8065 tmp |= dividers.post_divider;
8066 WREG32_SMC(cntl_reg, tmp);
8068 for (i = 0; i < 100; i++) {
8069 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8079 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8083 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8087 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8091 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8093 struct pci_dev *root = rdev->pdev->bus->self;
8094 int bridge_pos, gpu_pos;
8095 u32 speed_cntl, mask, current_data_rate;
8099 if (radeon_pcie_gen2 == 0)
8102 if (rdev->flags & RADEON_IS_IGP)
8105 if (!(rdev->flags & RADEON_IS_PCIE))
8108 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8112 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8115 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8116 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8117 LC_CURRENT_DATA_RATE_SHIFT;
8118 if (mask & DRM_PCIE_SPEED_80) {
8119 if (current_data_rate == 2) {
8120 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8123 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8124 } else if (mask & DRM_PCIE_SPEED_50) {
8125 if (current_data_rate == 1) {
8126 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8129 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8132 bridge_pos = pci_pcie_cap(root);
8136 gpu_pos = pci_pcie_cap(rdev->pdev);
8140 if (mask & DRM_PCIE_SPEED_80) {
8141 /* re-try equalization if gen3 is not already enabled */
8142 if (current_data_rate != 2) {
8143 u16 bridge_cfg, gpu_cfg;
8144 u16 bridge_cfg2, gpu_cfg2;
8145 u32 max_lw, current_lw, tmp;
8147 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8148 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8150 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8151 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8153 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8154 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8156 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8157 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8158 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8160 if (current_lw < max_lw) {
8161 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8162 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8163 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8164 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8165 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8166 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8170 for (i = 0; i < 10; i++) {
8172 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8173 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8176 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8177 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8179 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8180 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8182 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8183 tmp |= LC_SET_QUIESCE;
8184 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8186 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8188 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8193 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8194 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8195 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8196 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8198 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8199 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8200 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8201 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8204 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8205 tmp16 &= ~((1 << 4) | (7 << 9));
8206 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8207 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8209 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8210 tmp16 &= ~((1 << 4) | (7 << 9));
8211 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8212 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8214 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8215 tmp &= ~LC_SET_QUIESCE;
8216 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8221 /* set the link speed */
8222 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8223 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8224 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8226 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8228 if (mask & DRM_PCIE_SPEED_80)
8229 tmp16 |= 3; /* gen3 */
8230 else if (mask & DRM_PCIE_SPEED_50)
8231 tmp16 |= 2; /* gen2 */
8233 tmp16 |= 1; /* gen1 */
8234 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8236 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8237 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8238 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8240 for (i = 0; i < rdev->usec_timeout; i++) {
8241 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8242 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8248 static void cik_program_aspm(struct radeon_device *rdev)
8251 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8252 bool disable_clkreq = false;
8254 if (radeon_aspm == 0)
8257 /* XXX double check IGPs */
8258 if (rdev->flags & RADEON_IS_IGP)
8261 if (!(rdev->flags & RADEON_IS_PCIE))
8264 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8265 data &= ~LC_XMIT_N_FTS_MASK;
8266 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8268 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8270 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8271 data |= LC_GO_TO_RECOVERY;
8273 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8275 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8276 data |= P_IGNORE_EDB_ERR;
8278 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8280 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8281 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8282 data |= LC_PMI_TO_L1_DIS;
8284 data |= LC_L0S_INACTIVITY(7);
8287 data |= LC_L1_INACTIVITY(7);
8288 data &= ~LC_PMI_TO_L1_DIS;
8290 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8292 if (!disable_plloff_in_l1) {
8293 bool clk_req_support;
8295 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8296 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8297 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8299 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8301 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8302 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8303 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8305 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8307 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8308 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8309 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8311 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8313 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8314 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8315 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8317 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8319 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8320 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8321 data |= LC_DYN_LANES_PWR_STATE(3);
8323 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8325 if (!disable_clkreq) {
8326 struct pci_dev *root = rdev->pdev->bus->self;
8329 clk_req_support = false;
8330 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8331 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8332 clk_req_support = true;
8334 clk_req_support = false;
8337 if (clk_req_support) {
8338 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8339 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8341 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8343 orig = data = RREG32_SMC(THM_CLK_CNTL);
8344 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8345 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8347 WREG32_SMC(THM_CLK_CNTL, data);
8349 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8350 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8351 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8353 WREG32_SMC(MISC_CLK_CTRL, data);
8355 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8356 data &= ~BCLK_AS_XCLK;
8358 WREG32_SMC(CG_CLKPIN_CNTL, data);
8360 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8361 data &= ~FORCE_BIF_REFCLK_EN;
8363 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8365 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8366 data &= ~MPLL_CLKOUT_SEL_MASK;
8367 data |= MPLL_CLKOUT_SEL(4);
8369 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8374 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8377 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8378 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8380 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8383 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8384 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8385 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8386 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8387 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8388 data &= ~LC_L0S_INACTIVITY_MASK;
8390 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);