2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
39 #define CIK_MEC_UCODE_SIZE 4192
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
45 #define CIK_MC_UCODE_SIZE 7866
47 #define CIK_SDMA_UCODE_SIZE 1050
48 #define CIK_SDMA_UCODE_VERSION 64
50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64 MODULE_FIRMWARE("radeon/KABINI_me.bin");
65 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
70 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
71 extern void r600_ih_ring_fini(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
75 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
76 extern void si_rlc_fini(struct radeon_device *rdev);
77 extern int si_rlc_init(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
81 * Indirect registers accessor
83 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
87 WREG32(PCIE_INDEX, reg);
88 (void)RREG32(PCIE_INDEX);
89 r = RREG32(PCIE_DATA);
93 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95 WREG32(PCIE_INDEX, reg);
96 (void)RREG32(PCIE_INDEX);
98 (void)RREG32(PCIE_DATA);
101 static const u32 bonaire_golden_spm_registers[] =
103 0x30800, 0xe0ffffff, 0xe0000000
106 static const u32 bonaire_golden_common_registers[] =
108 0xc770, 0xffffffff, 0x00000800,
109 0xc774, 0xffffffff, 0x00000800,
110 0xc798, 0xffffffff, 0x00007fbf,
111 0xc79c, 0xffffffff, 0x00007faf
114 static const u32 bonaire_golden_registers[] =
116 0x3354, 0x00000333, 0x00000333,
117 0x3350, 0x000c0fc0, 0x00040200,
118 0x9a10, 0x00010000, 0x00058208,
119 0x3c000, 0xffff1fff, 0x00140000,
120 0x3c200, 0xfdfc0fff, 0x00000100,
121 0x3c234, 0x40000000, 0x40000200,
122 0x9830, 0xffffffff, 0x00000000,
123 0x9834, 0xf00fffff, 0x00000400,
124 0x9838, 0x0002021c, 0x00020200,
125 0xc78, 0x00000080, 0x00000000,
126 0x5bb0, 0x000000f0, 0x00000070,
127 0x5bc0, 0xf0311fff, 0x80300000,
128 0x98f8, 0x73773777, 0x12010001,
129 0x350c, 0x00810000, 0x408af000,
130 0x7030, 0x31000111, 0x00000011,
131 0x2f48, 0x73773777, 0x12010001,
132 0x220c, 0x00007fb6, 0x0021a1b1,
133 0x2210, 0x00007fb6, 0x002021b1,
134 0x2180, 0x00007fb6, 0x00002191,
135 0x2218, 0x00007fb6, 0x002121b1,
136 0x221c, 0x00007fb6, 0x002021b1,
137 0x21dc, 0x00007fb6, 0x00002191,
138 0x21e0, 0x00007fb6, 0x00002191,
139 0x3628, 0x0000003f, 0x0000000a,
140 0x362c, 0x0000003f, 0x0000000a,
141 0x2ae4, 0x00073ffe, 0x000022a2,
142 0x240c, 0x000007ff, 0x00000000,
143 0x8a14, 0xf000003f, 0x00000007,
144 0x8bf0, 0x00002001, 0x00000001,
145 0x8b24, 0xffffffff, 0x00ffffff,
146 0x30a04, 0x0000ff0f, 0x00000000,
147 0x28a4c, 0x07ffffff, 0x06000000,
148 0x4d8, 0x00000fff, 0x00000100,
149 0x3e78, 0x00000001, 0x00000002,
150 0x9100, 0x03000000, 0x0362c688,
151 0x8c00, 0x000000ff, 0x00000001,
152 0xe40, 0x00001fff, 0x00001fff,
153 0x9060, 0x0000007f, 0x00000020,
154 0x9508, 0x00010000, 0x00010000,
155 0xac14, 0x000003ff, 0x000000f3,
156 0xac0c, 0xffffffff, 0x00001032
159 static const u32 bonaire_mgcg_cgcg_init[] =
161 0xc420, 0xffffffff, 0xfffffffc,
162 0x30800, 0xffffffff, 0xe0000000,
163 0x3c2a0, 0xffffffff, 0x00000100,
164 0x3c208, 0xffffffff, 0x00000100,
165 0x3c2c0, 0xffffffff, 0xc0000100,
166 0x3c2c8, 0xffffffff, 0xc0000100,
167 0x3c2c4, 0xffffffff, 0xc0000100,
168 0x55e4, 0xffffffff, 0x00600100,
169 0x3c280, 0xffffffff, 0x00000100,
170 0x3c214, 0xffffffff, 0x06000100,
171 0x3c220, 0xffffffff, 0x00000100,
172 0x3c218, 0xffffffff, 0x06000100,
173 0x3c204, 0xffffffff, 0x00000100,
174 0x3c2e0, 0xffffffff, 0x00000100,
175 0x3c224, 0xffffffff, 0x00000100,
176 0x3c200, 0xffffffff, 0x00000100,
177 0x3c230, 0xffffffff, 0x00000100,
178 0x3c234, 0xffffffff, 0x00000100,
179 0x3c250, 0xffffffff, 0x00000100,
180 0x3c254, 0xffffffff, 0x00000100,
181 0x3c258, 0xffffffff, 0x00000100,
182 0x3c25c, 0xffffffff, 0x00000100,
183 0x3c260, 0xffffffff, 0x00000100,
184 0x3c27c, 0xffffffff, 0x00000100,
185 0x3c278, 0xffffffff, 0x00000100,
186 0x3c210, 0xffffffff, 0x06000100,
187 0x3c290, 0xffffffff, 0x00000100,
188 0x3c274, 0xffffffff, 0x00000100,
189 0x3c2b4, 0xffffffff, 0x00000100,
190 0x3c2b0, 0xffffffff, 0x00000100,
191 0x3c270, 0xffffffff, 0x00000100,
192 0x30800, 0xffffffff, 0xe0000000,
193 0x3c020, 0xffffffff, 0x00010000,
194 0x3c024, 0xffffffff, 0x00030002,
195 0x3c028, 0xffffffff, 0x00040007,
196 0x3c02c, 0xffffffff, 0x00060005,
197 0x3c030, 0xffffffff, 0x00090008,
198 0x3c034, 0xffffffff, 0x00010000,
199 0x3c038, 0xffffffff, 0x00030002,
200 0x3c03c, 0xffffffff, 0x00040007,
201 0x3c040, 0xffffffff, 0x00060005,
202 0x3c044, 0xffffffff, 0x00090008,
203 0x3c048, 0xffffffff, 0x00010000,
204 0x3c04c, 0xffffffff, 0x00030002,
205 0x3c050, 0xffffffff, 0x00040007,
206 0x3c054, 0xffffffff, 0x00060005,
207 0x3c058, 0xffffffff, 0x00090008,
208 0x3c05c, 0xffffffff, 0x00010000,
209 0x3c060, 0xffffffff, 0x00030002,
210 0x3c064, 0xffffffff, 0x00040007,
211 0x3c068, 0xffffffff, 0x00060005,
212 0x3c06c, 0xffffffff, 0x00090008,
213 0x3c070, 0xffffffff, 0x00010000,
214 0x3c074, 0xffffffff, 0x00030002,
215 0x3c078, 0xffffffff, 0x00040007,
216 0x3c07c, 0xffffffff, 0x00060005,
217 0x3c080, 0xffffffff, 0x00090008,
218 0x3c084, 0xffffffff, 0x00010000,
219 0x3c088, 0xffffffff, 0x00030002,
220 0x3c08c, 0xffffffff, 0x00040007,
221 0x3c090, 0xffffffff, 0x00060005,
222 0x3c094, 0xffffffff, 0x00090008,
223 0x3c098, 0xffffffff, 0x00010000,
224 0x3c09c, 0xffffffff, 0x00030002,
225 0x3c0a0, 0xffffffff, 0x00040007,
226 0x3c0a4, 0xffffffff, 0x00060005,
227 0x3c0a8, 0xffffffff, 0x00090008,
228 0x3c000, 0xffffffff, 0x96e00200,
229 0x8708, 0xffffffff, 0x00900100,
230 0xc424, 0xffffffff, 0x0020003f,
231 0x38, 0xffffffff, 0x0140001c,
232 0x3c, 0x000f0000, 0x000f0000,
233 0x220, 0xffffffff, 0xC060000C,
234 0x224, 0xc0000fff, 0x00000100,
235 0xf90, 0xffffffff, 0x00000100,
236 0xf98, 0x00000101, 0x00000000,
237 0x20a8, 0xffffffff, 0x00000104,
238 0x55e4, 0xff000fff, 0x00000100,
239 0x30cc, 0xc0000fff, 0x00000104,
240 0xc1e4, 0x00000001, 0x00000001,
241 0xd00c, 0xff000ff0, 0x00000100,
242 0xd80c, 0xff000ff0, 0x00000100
245 static const u32 spectre_golden_spm_registers[] =
247 0x30800, 0xe0ffffff, 0xe0000000
250 static const u32 spectre_golden_common_registers[] =
252 0xc770, 0xffffffff, 0x00000800,
253 0xc774, 0xffffffff, 0x00000800,
254 0xc798, 0xffffffff, 0x00007fbf,
255 0xc79c, 0xffffffff, 0x00007faf
258 static const u32 spectre_golden_registers[] =
260 0x3c000, 0xffff1fff, 0x96940200,
261 0x3c00c, 0xffff0001, 0xff000000,
262 0x3c200, 0xfffc0fff, 0x00000100,
263 0x6ed8, 0x00010101, 0x00010000,
264 0x9834, 0xf00fffff, 0x00000400,
265 0x9838, 0xfffffffc, 0x00020200,
266 0x5bb0, 0x000000f0, 0x00000070,
267 0x5bc0, 0xf0311fff, 0x80300000,
268 0x98f8, 0x73773777, 0x12010001,
269 0x9b7c, 0x00ff0000, 0x00fc0000,
270 0x2f48, 0x73773777, 0x12010001,
271 0x8a14, 0xf000003f, 0x00000007,
272 0x8b24, 0xffffffff, 0x00ffffff,
273 0x28350, 0x3f3f3fff, 0x00000082,
274 0x28355, 0x0000003f, 0x00000000,
275 0x3e78, 0x00000001, 0x00000002,
276 0x913c, 0xffff03df, 0x00000004,
277 0xc768, 0x00000008, 0x00000008,
278 0x8c00, 0x000008ff, 0x00000800,
279 0x9508, 0x00010000, 0x00010000,
280 0xac0c, 0xffffffff, 0x54763210,
281 0x214f8, 0x01ff01ff, 0x00000002,
282 0x21498, 0x007ff800, 0x00200000,
283 0x2015c, 0xffffffff, 0x00000f40,
284 0x30934, 0xffffffff, 0x00000001
287 static const u32 spectre_mgcg_cgcg_init[] =
289 0xc420, 0xffffffff, 0xfffffffc,
290 0x30800, 0xffffffff, 0xe0000000,
291 0x3c2a0, 0xffffffff, 0x00000100,
292 0x3c208, 0xffffffff, 0x00000100,
293 0x3c2c0, 0xffffffff, 0x00000100,
294 0x3c2c8, 0xffffffff, 0x00000100,
295 0x3c2c4, 0xffffffff, 0x00000100,
296 0x55e4, 0xffffffff, 0x00600100,
297 0x3c280, 0xffffffff, 0x00000100,
298 0x3c214, 0xffffffff, 0x06000100,
299 0x3c220, 0xffffffff, 0x00000100,
300 0x3c218, 0xffffffff, 0x06000100,
301 0x3c204, 0xffffffff, 0x00000100,
302 0x3c2e0, 0xffffffff, 0x00000100,
303 0x3c224, 0xffffffff, 0x00000100,
304 0x3c200, 0xffffffff, 0x00000100,
305 0x3c230, 0xffffffff, 0x00000100,
306 0x3c234, 0xffffffff, 0x00000100,
307 0x3c250, 0xffffffff, 0x00000100,
308 0x3c254, 0xffffffff, 0x00000100,
309 0x3c258, 0xffffffff, 0x00000100,
310 0x3c25c, 0xffffffff, 0x00000100,
311 0x3c260, 0xffffffff, 0x00000100,
312 0x3c27c, 0xffffffff, 0x00000100,
313 0x3c278, 0xffffffff, 0x00000100,
314 0x3c210, 0xffffffff, 0x06000100,
315 0x3c290, 0xffffffff, 0x00000100,
316 0x3c274, 0xffffffff, 0x00000100,
317 0x3c2b4, 0xffffffff, 0x00000100,
318 0x3c2b0, 0xffffffff, 0x00000100,
319 0x3c270, 0xffffffff, 0x00000100,
320 0x30800, 0xffffffff, 0xe0000000,
321 0x3c020, 0xffffffff, 0x00010000,
322 0x3c024, 0xffffffff, 0x00030002,
323 0x3c028, 0xffffffff, 0x00040007,
324 0x3c02c, 0xffffffff, 0x00060005,
325 0x3c030, 0xffffffff, 0x00090008,
326 0x3c034, 0xffffffff, 0x00010000,
327 0x3c038, 0xffffffff, 0x00030002,
328 0x3c03c, 0xffffffff, 0x00040007,
329 0x3c040, 0xffffffff, 0x00060005,
330 0x3c044, 0xffffffff, 0x00090008,
331 0x3c048, 0xffffffff, 0x00010000,
332 0x3c04c, 0xffffffff, 0x00030002,
333 0x3c050, 0xffffffff, 0x00040007,
334 0x3c054, 0xffffffff, 0x00060005,
335 0x3c058, 0xffffffff, 0x00090008,
336 0x3c05c, 0xffffffff, 0x00010000,
337 0x3c060, 0xffffffff, 0x00030002,
338 0x3c064, 0xffffffff, 0x00040007,
339 0x3c068, 0xffffffff, 0x00060005,
340 0x3c06c, 0xffffffff, 0x00090008,
341 0x3c070, 0xffffffff, 0x00010000,
342 0x3c074, 0xffffffff, 0x00030002,
343 0x3c078, 0xffffffff, 0x00040007,
344 0x3c07c, 0xffffffff, 0x00060005,
345 0x3c080, 0xffffffff, 0x00090008,
346 0x3c084, 0xffffffff, 0x00010000,
347 0x3c088, 0xffffffff, 0x00030002,
348 0x3c08c, 0xffffffff, 0x00040007,
349 0x3c090, 0xffffffff, 0x00060005,
350 0x3c094, 0xffffffff, 0x00090008,
351 0x3c098, 0xffffffff, 0x00010000,
352 0x3c09c, 0xffffffff, 0x00030002,
353 0x3c0a0, 0xffffffff, 0x00040007,
354 0x3c0a4, 0xffffffff, 0x00060005,
355 0x3c0a8, 0xffffffff, 0x00090008,
356 0x3c0ac, 0xffffffff, 0x00010000,
357 0x3c0b0, 0xffffffff, 0x00030002,
358 0x3c0b4, 0xffffffff, 0x00040007,
359 0x3c0b8, 0xffffffff, 0x00060005,
360 0x3c0bc, 0xffffffff, 0x00090008,
361 0x3c000, 0xffffffff, 0x96e00200,
362 0x8708, 0xffffffff, 0x00900100,
363 0xc424, 0xffffffff, 0x0020003f,
364 0x38, 0xffffffff, 0x0140001c,
365 0x3c, 0x000f0000, 0x000f0000,
366 0x220, 0xffffffff, 0xC060000C,
367 0x224, 0xc0000fff, 0x00000100,
368 0xf90, 0xffffffff, 0x00000100,
369 0xf98, 0x00000101, 0x00000000,
370 0x20a8, 0xffffffff, 0x00000104,
371 0x55e4, 0xff000fff, 0x00000100,
372 0x30cc, 0xc0000fff, 0x00000104,
373 0xc1e4, 0x00000001, 0x00000001,
374 0xd00c, 0xff000ff0, 0x00000100,
375 0xd80c, 0xff000ff0, 0x00000100
378 static const u32 kalindi_golden_spm_registers[] =
380 0x30800, 0xe0ffffff, 0xe0000000
383 static const u32 kalindi_golden_common_registers[] =
385 0xc770, 0xffffffff, 0x00000800,
386 0xc774, 0xffffffff, 0x00000800,
387 0xc798, 0xffffffff, 0x00007fbf,
388 0xc79c, 0xffffffff, 0x00007faf
391 static const u32 kalindi_golden_registers[] =
393 0x3c000, 0xffffdfff, 0x6e944040,
394 0x55e4, 0xff607fff, 0xfc000100,
395 0x3c220, 0xff000fff, 0x00000100,
396 0x3c224, 0xff000fff, 0x00000100,
397 0x3c200, 0xfffc0fff, 0x00000100,
398 0x6ed8, 0x00010101, 0x00010000,
399 0x9830, 0xffffffff, 0x00000000,
400 0x9834, 0xf00fffff, 0x00000400,
401 0x5bb0, 0x000000f0, 0x00000070,
402 0x5bc0, 0xf0311fff, 0x80300000,
403 0x98f8, 0x73773777, 0x12010001,
404 0x98fc, 0xffffffff, 0x00000010,
405 0x9b7c, 0x00ff0000, 0x00fc0000,
406 0x8030, 0x00001f0f, 0x0000100a,
407 0x2f48, 0x73773777, 0x12010001,
408 0x2408, 0x000fffff, 0x000c007f,
409 0x8a14, 0xf000003f, 0x00000007,
410 0x8b24, 0x3fff3fff, 0x00ffcfff,
411 0x30a04, 0x0000ff0f, 0x00000000,
412 0x28a4c, 0x07ffffff, 0x06000000,
413 0x4d8, 0x00000fff, 0x00000100,
414 0x3e78, 0x00000001, 0x00000002,
415 0xc768, 0x00000008, 0x00000008,
416 0x8c00, 0x000000ff, 0x00000003,
417 0x214f8, 0x01ff01ff, 0x00000002,
418 0x21498, 0x007ff800, 0x00200000,
419 0x2015c, 0xffffffff, 0x00000f40,
420 0x88c4, 0x001f3ae3, 0x00000082,
421 0x88d4, 0x0000001f, 0x00000010,
422 0x30934, 0xffffffff, 0x00000000
425 static const u32 kalindi_mgcg_cgcg_init[] =
427 0xc420, 0xffffffff, 0xfffffffc,
428 0x30800, 0xffffffff, 0xe0000000,
429 0x3c2a0, 0xffffffff, 0x00000100,
430 0x3c208, 0xffffffff, 0x00000100,
431 0x3c2c0, 0xffffffff, 0x00000100,
432 0x3c2c8, 0xffffffff, 0x00000100,
433 0x3c2c4, 0xffffffff, 0x00000100,
434 0x55e4, 0xffffffff, 0x00600100,
435 0x3c280, 0xffffffff, 0x00000100,
436 0x3c214, 0xffffffff, 0x06000100,
437 0x3c220, 0xffffffff, 0x00000100,
438 0x3c218, 0xffffffff, 0x06000100,
439 0x3c204, 0xffffffff, 0x00000100,
440 0x3c2e0, 0xffffffff, 0x00000100,
441 0x3c224, 0xffffffff, 0x00000100,
442 0x3c200, 0xffffffff, 0x00000100,
443 0x3c230, 0xffffffff, 0x00000100,
444 0x3c234, 0xffffffff, 0x00000100,
445 0x3c250, 0xffffffff, 0x00000100,
446 0x3c254, 0xffffffff, 0x00000100,
447 0x3c258, 0xffffffff, 0x00000100,
448 0x3c25c, 0xffffffff, 0x00000100,
449 0x3c260, 0xffffffff, 0x00000100,
450 0x3c27c, 0xffffffff, 0x00000100,
451 0x3c278, 0xffffffff, 0x00000100,
452 0x3c210, 0xffffffff, 0x06000100,
453 0x3c290, 0xffffffff, 0x00000100,
454 0x3c274, 0xffffffff, 0x00000100,
455 0x3c2b4, 0xffffffff, 0x00000100,
456 0x3c2b0, 0xffffffff, 0x00000100,
457 0x3c270, 0xffffffff, 0x00000100,
458 0x30800, 0xffffffff, 0xe0000000,
459 0x3c020, 0xffffffff, 0x00010000,
460 0x3c024, 0xffffffff, 0x00030002,
461 0x3c028, 0xffffffff, 0x00040007,
462 0x3c02c, 0xffffffff, 0x00060005,
463 0x3c030, 0xffffffff, 0x00090008,
464 0x3c034, 0xffffffff, 0x00010000,
465 0x3c038, 0xffffffff, 0x00030002,
466 0x3c03c, 0xffffffff, 0x00040007,
467 0x3c040, 0xffffffff, 0x00060005,
468 0x3c044, 0xffffffff, 0x00090008,
469 0x3c000, 0xffffffff, 0x96e00200,
470 0x8708, 0xffffffff, 0x00900100,
471 0xc424, 0xffffffff, 0x0020003f,
472 0x38, 0xffffffff, 0x0140001c,
473 0x3c, 0x000f0000, 0x000f0000,
474 0x220, 0xffffffff, 0xC060000C,
475 0x224, 0xc0000fff, 0x00000100,
476 0x20a8, 0xffffffff, 0x00000104,
477 0x55e4, 0xff000fff, 0x00000100,
478 0x30cc, 0xc0000fff, 0x00000104,
479 0xc1e4, 0x00000001, 0x00000001,
480 0xd00c, 0xff000ff0, 0x00000100,
481 0xd80c, 0xff000ff0, 0x00000100
484 static void cik_init_golden_registers(struct radeon_device *rdev)
486 switch (rdev->family) {
488 radeon_program_register_sequence(rdev,
489 bonaire_mgcg_cgcg_init,
490 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
491 radeon_program_register_sequence(rdev,
492 bonaire_golden_registers,
493 (const u32)ARRAY_SIZE(bonaire_golden_registers));
494 radeon_program_register_sequence(rdev,
495 bonaire_golden_common_registers,
496 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
497 radeon_program_register_sequence(rdev,
498 bonaire_golden_spm_registers,
499 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
502 radeon_program_register_sequence(rdev,
503 kalindi_mgcg_cgcg_init,
504 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
505 radeon_program_register_sequence(rdev,
506 kalindi_golden_registers,
507 (const u32)ARRAY_SIZE(kalindi_golden_registers));
508 radeon_program_register_sequence(rdev,
509 kalindi_golden_common_registers,
510 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
511 radeon_program_register_sequence(rdev,
512 kalindi_golden_spm_registers,
513 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
516 radeon_program_register_sequence(rdev,
517 spectre_mgcg_cgcg_init,
518 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
519 radeon_program_register_sequence(rdev,
520 spectre_golden_registers,
521 (const u32)ARRAY_SIZE(spectre_golden_registers));
522 radeon_program_register_sequence(rdev,
523 spectre_golden_common_registers,
524 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
525 radeon_program_register_sequence(rdev,
526 spectre_golden_spm_registers,
527 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
535 * cik_get_xclk - get the xclk
537 * @rdev: radeon_device pointer
539 * Returns the reference clock used by the gfx engine
542 u32 cik_get_xclk(struct radeon_device *rdev)
544 u32 reference_clock = rdev->clock.spll.reference_freq;
546 if (rdev->flags & RADEON_IS_IGP) {
547 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
548 return reference_clock / 2;
550 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
551 return reference_clock / 4;
553 return reference_clock;
557 * cik_mm_rdoorbell - read a doorbell dword
559 * @rdev: radeon_device pointer
560 * @offset: byte offset into the aperture
562 * Returns the value in the doorbell aperture at the
563 * requested offset (CIK).
565 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
567 if (offset < rdev->doorbell.size) {
568 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
570 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
576 * cik_mm_wdoorbell - write a doorbell dword
578 * @rdev: radeon_device pointer
579 * @offset: byte offset into the aperture
582 * Writes @v to the doorbell aperture at the
583 * requested offset (CIK).
585 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
587 if (offset < rdev->doorbell.size) {
588 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
590 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
594 #define BONAIRE_IO_MC_REGS_SIZE 36
596 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
598 {0x00000070, 0x04400000},
599 {0x00000071, 0x80c01803},
600 {0x00000072, 0x00004004},
601 {0x00000073, 0x00000100},
602 {0x00000074, 0x00ff0000},
603 {0x00000075, 0x34000000},
604 {0x00000076, 0x08000014},
605 {0x00000077, 0x00cc08ec},
606 {0x00000078, 0x00000400},
607 {0x00000079, 0x00000000},
608 {0x0000007a, 0x04090000},
609 {0x0000007c, 0x00000000},
610 {0x0000007e, 0x4408a8e8},
611 {0x0000007f, 0x00000304},
612 {0x00000080, 0x00000000},
613 {0x00000082, 0x00000001},
614 {0x00000083, 0x00000002},
615 {0x00000084, 0xf3e4f400},
616 {0x00000085, 0x052024e3},
617 {0x00000087, 0x00000000},
618 {0x00000088, 0x01000000},
619 {0x0000008a, 0x1c0a0000},
620 {0x0000008b, 0xff010000},
621 {0x0000008d, 0xffffefff},
622 {0x0000008e, 0xfff3efff},
623 {0x0000008f, 0xfff3efbf},
624 {0x00000092, 0xf7ffffff},
625 {0x00000093, 0xffffff7f},
626 {0x00000095, 0x00101101},
627 {0x00000096, 0x00000fff},
628 {0x00000097, 0x00116fff},
629 {0x00000098, 0x60010000},
630 {0x00000099, 0x10010000},
631 {0x0000009a, 0x00006000},
632 {0x0000009b, 0x00001000},
633 {0x0000009f, 0x00b48000}
637 * cik_srbm_select - select specific register instances
639 * @rdev: radeon_device pointer
640 * @me: selected ME (micro engine)
645 * Switches the currently active registers instances. Some
646 * registers are instanced per VMID, others are instanced per
647 * me/pipe/queue combination.
649 static void cik_srbm_select(struct radeon_device *rdev,
650 u32 me, u32 pipe, u32 queue, u32 vmid)
652 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
655 QUEUEID(queue & 0x7));
656 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
661 * ci_mc_load_microcode - load MC ucode into the hw
663 * @rdev: radeon_device pointer
665 * Load the GDDR MC ucode into the hw (CIK).
666 * Returns 0 on success, error on failure.
668 static int ci_mc_load_microcode(struct radeon_device *rdev)
670 const __be32 *fw_data;
671 u32 running, blackout = 0;
673 int i, ucode_size, regs_size;
678 switch (rdev->family) {
681 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
682 ucode_size = CIK_MC_UCODE_SIZE;
683 regs_size = BONAIRE_IO_MC_REGS_SIZE;
687 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
691 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
692 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
695 /* reset the engine and set to writable */
696 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
699 /* load mc io regs */
700 for (i = 0; i < regs_size; i++) {
701 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
702 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
704 /* load the MC ucode */
705 fw_data = (const __be32 *)rdev->mc_fw->data;
706 for (i = 0; i < ucode_size; i++)
707 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
709 /* put the engine back into the active state */
710 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
711 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
712 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
714 /* wait for training to complete */
715 for (i = 0; i < rdev->usec_timeout; i++) {
716 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
720 for (i = 0; i < rdev->usec_timeout; i++) {
721 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
727 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
734 * cik_init_microcode - load ucode images from disk
736 * @rdev: radeon_device pointer
738 * Use the firmware interface to load the ucode images into
739 * the driver (not loaded into hw).
740 * Returns 0 on success, error on failure.
742 static int cik_init_microcode(struct radeon_device *rdev)
744 const char *chip_name;
745 size_t pfp_req_size, me_req_size, ce_req_size,
746 mec_req_size, rlc_req_size, mc_req_size,
753 switch (rdev->family) {
755 chip_name = "BONAIRE";
756 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
757 me_req_size = CIK_ME_UCODE_SIZE * 4;
758 ce_req_size = CIK_CE_UCODE_SIZE * 4;
759 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
760 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
761 mc_req_size = CIK_MC_UCODE_SIZE * 4;
762 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
765 chip_name = "KAVERI";
766 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 me_req_size = CIK_ME_UCODE_SIZE * 4;
768 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
774 chip_name = "KABINI";
775 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 me_req_size = CIK_ME_UCODE_SIZE * 4;
777 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
780 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
785 DRM_INFO("Loading %s Microcode\n", chip_name);
787 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
788 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
791 if (rdev->pfp_fw->size != pfp_req_size) {
793 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794 rdev->pfp_fw->size, fw_name);
799 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
800 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
803 if (rdev->me_fw->size != me_req_size) {
805 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806 rdev->me_fw->size, fw_name);
810 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
811 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
814 if (rdev->ce_fw->size != ce_req_size) {
816 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817 rdev->ce_fw->size, fw_name);
821 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
822 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
825 if (rdev->mec_fw->size != mec_req_size) {
827 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828 rdev->mec_fw->size, fw_name);
832 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
833 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
836 if (rdev->rlc_fw->size != rlc_req_size) {
838 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839 rdev->rlc_fw->size, fw_name);
843 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
844 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
847 if (rdev->sdma_fw->size != sdma_req_size) {
849 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850 rdev->sdma_fw->size, fw_name);
854 /* No MC ucode on APUs */
855 if (!(rdev->flags & RADEON_IS_IGP)) {
856 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
857 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
860 if (rdev->mc_fw->size != mc_req_size) {
862 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863 rdev->mc_fw->size, fw_name);
872 "cik_cp: Failed to load firmware \"%s\"\n",
874 release_firmware(rdev->pfp_fw);
876 release_firmware(rdev->me_fw);
878 release_firmware(rdev->ce_fw);
880 release_firmware(rdev->rlc_fw);
882 release_firmware(rdev->mc_fw);
892 * cik_tiling_mode_table_init - init the hw tiling table
894 * @rdev: radeon_device pointer
896 * Starting with SI, the tiling setup is done globally in a
897 * set of 32 tiling modes. Rather than selecting each set of
898 * parameters per surface as on older asics, we just select
899 * which index in the tiling table we want to use, and the
900 * surface uses those parameters (CIK).
902 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
904 const u32 num_tile_mode_states = 32;
905 const u32 num_secondary_tile_mode_states = 16;
906 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
907 u32 num_pipe_configs;
908 u32 num_rbs = rdev->config.cik.max_backends_per_se *
909 rdev->config.cik.max_shader_engines;
911 switch (rdev->config.cik.mem_row_size_in_kb) {
913 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
917 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
920 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
924 num_pipe_configs = rdev->config.cik.max_tile_pipes;
925 if (num_pipe_configs > 8)
926 num_pipe_configs = 8; /* ??? */
928 if (num_pipe_configs == 8) {
929 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
930 switch (reg_offset) {
932 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
938 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
950 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
952 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
953 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
956 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959 TILE_SPLIT(split_equal_to_row_size));
962 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
963 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
966 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
967 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
972 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975 TILE_SPLIT(split_equal_to_row_size));
978 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
982 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
986 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
987 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
992 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
993 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
994 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
998 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1004 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1005 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1008 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1009 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1014 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1016 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1020 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1021 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1026 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1027 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1030 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1031 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1036 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1038 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1042 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1043 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1051 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1052 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1054 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1055 switch (reg_offset) {
1057 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1060 NUM_BANKS(ADDR_SURF_16_BANK));
1063 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066 NUM_BANKS(ADDR_SURF_16_BANK));
1069 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1072 NUM_BANKS(ADDR_SURF_16_BANK));
1075 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1078 NUM_BANKS(ADDR_SURF_16_BANK));
1081 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084 NUM_BANKS(ADDR_SURF_8_BANK));
1087 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1090 NUM_BANKS(ADDR_SURF_4_BANK));
1093 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1096 NUM_BANKS(ADDR_SURF_2_BANK));
1099 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1101 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1102 NUM_BANKS(ADDR_SURF_16_BANK));
1105 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108 NUM_BANKS(ADDR_SURF_16_BANK));
1111 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1114 NUM_BANKS(ADDR_SURF_16_BANK));
1117 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1120 NUM_BANKS(ADDR_SURF_16_BANK));
1123 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126 NUM_BANKS(ADDR_SURF_8_BANK));
1129 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1131 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1132 NUM_BANKS(ADDR_SURF_4_BANK));
1135 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1138 NUM_BANKS(ADDR_SURF_2_BANK));
1144 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1146 } else if (num_pipe_configs == 4) {
1148 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1149 switch (reg_offset) {
1151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1157 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1163 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1169 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1171 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1175 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178 TILE_SPLIT(split_equal_to_row_size));
1181 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1191 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1193 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194 TILE_SPLIT(split_equal_to_row_size));
1197 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1198 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1201 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1205 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1206 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1211 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1213 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1217 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1218 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1219 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1223 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1227 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1233 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1235 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1239 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1241 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1245 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1246 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1249 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1255 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1256 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1257 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1261 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1262 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1263 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1270 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1271 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1273 } else if (num_rbs < 4) {
1274 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1275 switch (reg_offset) {
1277 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1283 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1289 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1295 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1297 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1301 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304 TILE_SPLIT(split_equal_to_row_size));
1307 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1311 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1313 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1317 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1319 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320 TILE_SPLIT(split_equal_to_row_size));
1323 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1327 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1328 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1331 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1332 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1337 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1339 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1343 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1344 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1349 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1353 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1359 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1361 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1365 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1366 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1367 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1371 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1381 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1382 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1383 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1387 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1388 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1389 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1396 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1397 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401 switch (reg_offset) {
1403 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406 NUM_BANKS(ADDR_SURF_16_BANK));
1409 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412 NUM_BANKS(ADDR_SURF_16_BANK));
1415 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418 NUM_BANKS(ADDR_SURF_16_BANK));
1421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424 NUM_BANKS(ADDR_SURF_16_BANK));
1427 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430 NUM_BANKS(ADDR_SURF_16_BANK));
1433 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1436 NUM_BANKS(ADDR_SURF_8_BANK));
1439 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442 NUM_BANKS(ADDR_SURF_4_BANK));
1445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448 NUM_BANKS(ADDR_SURF_16_BANK));
1451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454 NUM_BANKS(ADDR_SURF_16_BANK));
1457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1460 NUM_BANKS(ADDR_SURF_16_BANK));
1463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1466 NUM_BANKS(ADDR_SURF_16_BANK));
1469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1472 NUM_BANKS(ADDR_SURF_16_BANK));
1475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1478 NUM_BANKS(ADDR_SURF_8_BANK));
1481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484 NUM_BANKS(ADDR_SURF_4_BANK));
1490 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1492 } else if (num_pipe_configs == 2) {
1493 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1494 switch (reg_offset) {
1496 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498 PIPE_CONFIG(ADDR_SURF_P2) |
1499 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504 PIPE_CONFIG(ADDR_SURF_P2) |
1505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1508 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510 PIPE_CONFIG(ADDR_SURF_P2) |
1511 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1514 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1516 PIPE_CONFIG(ADDR_SURF_P2) |
1517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1520 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522 PIPE_CONFIG(ADDR_SURF_P2) |
1523 TILE_SPLIT(split_equal_to_row_size));
1526 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1530 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1532 PIPE_CONFIG(ADDR_SURF_P2) |
1533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1536 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1538 PIPE_CONFIG(ADDR_SURF_P2) |
1539 TILE_SPLIT(split_equal_to_row_size));
1542 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1545 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1546 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1549 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551 PIPE_CONFIG(ADDR_SURF_P2) |
1552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1555 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557 PIPE_CONFIG(ADDR_SURF_P2) |
1558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1561 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1562 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563 PIPE_CONFIG(ADDR_SURF_P2) |
1564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1567 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1571 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573 PIPE_CONFIG(ADDR_SURF_P2) |
1574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1577 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1579 PIPE_CONFIG(ADDR_SURF_P2) |
1580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1583 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1585 PIPE_CONFIG(ADDR_SURF_P2) |
1586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1589 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1590 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1593 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595 PIPE_CONFIG(ADDR_SURF_P2) |
1596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1599 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1600 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1601 PIPE_CONFIG(ADDR_SURF_P2) |
1602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1605 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1606 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1607 PIPE_CONFIG(ADDR_SURF_P2) |
1608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1614 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1615 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1617 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1618 switch (reg_offset) {
1620 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623 NUM_BANKS(ADDR_SURF_16_BANK));
1626 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629 NUM_BANKS(ADDR_SURF_16_BANK));
1632 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635 NUM_BANKS(ADDR_SURF_16_BANK));
1638 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641 NUM_BANKS(ADDR_SURF_16_BANK));
1644 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647 NUM_BANKS(ADDR_SURF_16_BANK));
1650 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653 NUM_BANKS(ADDR_SURF_16_BANK));
1656 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1659 NUM_BANKS(ADDR_SURF_8_BANK));
1662 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665 NUM_BANKS(ADDR_SURF_16_BANK));
1668 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671 NUM_BANKS(ADDR_SURF_16_BANK));
1674 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677 NUM_BANKS(ADDR_SURF_16_BANK));
1680 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683 NUM_BANKS(ADDR_SURF_16_BANK));
1686 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689 NUM_BANKS(ADDR_SURF_16_BANK));
1692 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695 NUM_BANKS(ADDR_SURF_16_BANK));
1698 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1701 NUM_BANKS(ADDR_SURF_8_BANK));
1707 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1710 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1714 * cik_select_se_sh - select which SE, SH to address
1716 * @rdev: radeon_device pointer
1717 * @se_num: shader engine to address
1718 * @sh_num: sh block to address
1720 * Select which SE, SH combinations to address. Certain
1721 * registers are instanced per SE or SH. 0xffffffff means
1722 * broadcast to all SEs or SHs (CIK).
1724 static void cik_select_se_sh(struct radeon_device *rdev,
1725 u32 se_num, u32 sh_num)
1727 u32 data = INSTANCE_BROADCAST_WRITES;
1729 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1730 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1731 else if (se_num == 0xffffffff)
1732 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1733 else if (sh_num == 0xffffffff)
1734 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1736 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1737 WREG32(GRBM_GFX_INDEX, data);
1741 * cik_create_bitmask - create a bitmask
1743 * @bit_width: length of the mask
1745 * create a variable length bit mask (CIK).
1746 * Returns the bitmask.
1748 static u32 cik_create_bitmask(u32 bit_width)
1752 for (i = 0; i < bit_width; i++) {
1760 * cik_select_se_sh - select which SE, SH to address
1762 * @rdev: radeon_device pointer
1763 * @max_rb_num: max RBs (render backends) for the asic
1764 * @se_num: number of SEs (shader engines) for the asic
1765 * @sh_per_se: number of SH blocks per SE for the asic
1767 * Calculates the bitmask of disabled RBs (CIK).
1768 * Returns the disabled RB bitmask.
1770 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1771 u32 max_rb_num, u32 se_num,
1776 data = RREG32(CC_RB_BACKEND_DISABLE);
1778 data &= BACKEND_DISABLE_MASK;
1781 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1783 data >>= BACKEND_DISABLE_SHIFT;
1785 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1791 * cik_setup_rb - setup the RBs on the asic
1793 * @rdev: radeon_device pointer
1794 * @se_num: number of SEs (shader engines) for the asic
1795 * @sh_per_se: number of SH blocks per SE for the asic
1796 * @max_rb_num: max RBs (render backends) for the asic
1798 * Configures per-SE/SH RB registers (CIK).
1800 static void cik_setup_rb(struct radeon_device *rdev,
1801 u32 se_num, u32 sh_per_se,
1806 u32 disabled_rbs = 0;
1807 u32 enabled_rbs = 0;
1809 for (i = 0; i < se_num; i++) {
1810 for (j = 0; j < sh_per_se; j++) {
1811 cik_select_se_sh(rdev, i, j);
1812 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1813 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1816 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1819 for (i = 0; i < max_rb_num; i++) {
1820 if (!(disabled_rbs & mask))
1821 enabled_rbs |= mask;
1825 for (i = 0; i < se_num; i++) {
1826 cik_select_se_sh(rdev, i, 0xffffffff);
1828 for (j = 0; j < sh_per_se; j++) {
1829 switch (enabled_rbs & 3) {
1831 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1834 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1838 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1843 WREG32(PA_SC_RASTER_CONFIG, data);
1845 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1849 * cik_gpu_init - setup the 3D engine
1851 * @rdev: radeon_device pointer
1853 * Configures the 3D engine and tiling configuration
1854 * registers so that the 3D engine is usable.
1856 static void cik_gpu_init(struct radeon_device *rdev)
1858 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1859 u32 mc_shared_chmap, mc_arb_ramcfg;
1860 u32 hdp_host_path_cntl;
1864 switch (rdev->family) {
1866 rdev->config.cik.max_shader_engines = 2;
1867 rdev->config.cik.max_tile_pipes = 4;
1868 rdev->config.cik.max_cu_per_sh = 7;
1869 rdev->config.cik.max_sh_per_se = 1;
1870 rdev->config.cik.max_backends_per_se = 2;
1871 rdev->config.cik.max_texture_channel_caches = 4;
1872 rdev->config.cik.max_gprs = 256;
1873 rdev->config.cik.max_gs_threads = 32;
1874 rdev->config.cik.max_hw_contexts = 8;
1876 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1877 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1878 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1879 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1880 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1887 rdev->config.cik.max_shader_engines = 1;
1888 rdev->config.cik.max_tile_pipes = 2;
1889 rdev->config.cik.max_cu_per_sh = 2;
1890 rdev->config.cik.max_sh_per_se = 1;
1891 rdev->config.cik.max_backends_per_se = 1;
1892 rdev->config.cik.max_texture_channel_caches = 2;
1893 rdev->config.cik.max_gprs = 256;
1894 rdev->config.cik.max_gs_threads = 16;
1895 rdev->config.cik.max_hw_contexts = 8;
1897 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1898 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1899 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1900 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1901 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1905 /* Initialize HDP */
1906 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1907 WREG32((0x2c14 + j), 0x00000000);
1908 WREG32((0x2c18 + j), 0x00000000);
1909 WREG32((0x2c1c + j), 0x00000000);
1910 WREG32((0x2c20 + j), 0x00000000);
1911 WREG32((0x2c24 + j), 0x00000000);
1914 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1916 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1918 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1919 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1921 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1922 rdev->config.cik.mem_max_burst_length_bytes = 256;
1923 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1924 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925 if (rdev->config.cik.mem_row_size_in_kb > 4)
1926 rdev->config.cik.mem_row_size_in_kb = 4;
1927 /* XXX use MC settings? */
1928 rdev->config.cik.shader_engine_tile_size = 32;
1929 rdev->config.cik.num_gpus = 1;
1930 rdev->config.cik.multi_gpu_tile_size = 64;
1932 /* fix up row size */
1933 gb_addr_config &= ~ROW_SIZE_MASK;
1934 switch (rdev->config.cik.mem_row_size_in_kb) {
1937 gb_addr_config |= ROW_SIZE(0);
1940 gb_addr_config |= ROW_SIZE(1);
1943 gb_addr_config |= ROW_SIZE(2);
1947 /* setup tiling info dword. gb_addr_config is not adequate since it does
1948 * not have bank info, so create a custom tiling dword.
1949 * bits 3:0 num_pipes
1950 * bits 7:4 num_banks
1951 * bits 11:8 group_size
1952 * bits 15:12 row_size
1954 rdev->config.cik.tile_config = 0;
1955 switch (rdev->config.cik.num_tile_pipes) {
1957 rdev->config.cik.tile_config |= (0 << 0);
1960 rdev->config.cik.tile_config |= (1 << 0);
1963 rdev->config.cik.tile_config |= (2 << 0);
1967 /* XXX what about 12? */
1968 rdev->config.cik.tile_config |= (3 << 0);
1971 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1972 rdev->config.cik.tile_config |= 1 << 4;
1974 rdev->config.cik.tile_config |= 0 << 4;
1975 rdev->config.cik.tile_config |=
1976 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1977 rdev->config.cik.tile_config |=
1978 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1980 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1981 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1982 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1983 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1984 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1985 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1986 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1987 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1989 cik_tiling_mode_table_init(rdev);
1991 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1992 rdev->config.cik.max_sh_per_se,
1993 rdev->config.cik.max_backends_per_se);
1995 /* set HW defaults for 3D engine */
1996 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1998 WREG32(SX_DEBUG_1, 0x20);
2000 WREG32(TA_CNTL_AUX, 0x00010000);
2002 tmp = RREG32(SPI_CONFIG_CNTL);
2004 WREG32(SPI_CONFIG_CNTL, tmp);
2006 WREG32(SQ_CONFIG, 1);
2008 WREG32(DB_DEBUG, 0);
2010 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2012 WREG32(DB_DEBUG2, tmp);
2014 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2016 WREG32(DB_DEBUG3, tmp);
2018 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2020 WREG32(CB_HW_CONTROL, tmp);
2022 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2024 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2025 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2026 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2027 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2029 WREG32(VGT_NUM_INSTANCES, 1);
2031 WREG32(CP_PERFMON_CNTL, 0);
2033 WREG32(SQ_CONFIG, 0);
2035 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036 FORCE_EOV_MAX_REZ_CNT(255)));
2038 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2039 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2041 WREG32(VGT_GS_VERTEX_REUSE, 16);
2042 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2044 tmp = RREG32(HDP_MISC_CNTL);
2045 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2046 WREG32(HDP_MISC_CNTL, tmp);
2048 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2049 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2051 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2052 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2058 * GPU scratch registers helpers function.
2061 * cik_scratch_init - setup driver info for CP scratch regs
2063 * @rdev: radeon_device pointer
2065 * Set up the number and offset of the CP scratch registers.
2066 * NOTE: use of CP scratch registers is a legacy inferface and
2067 * is not used by default on newer asics (r6xx+). On newer asics,
2068 * memory buffers are used for fences rather than scratch regs.
2070 static void cik_scratch_init(struct radeon_device *rdev)
2074 rdev->scratch.num_reg = 7;
2075 rdev->scratch.reg_base = SCRATCH_REG0;
2076 for (i = 0; i < rdev->scratch.num_reg; i++) {
2077 rdev->scratch.free[i] = true;
2078 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2083 * cik_ring_test - basic gfx ring test
2085 * @rdev: radeon_device pointer
2086 * @ring: radeon_ring structure holding ring information
2088 * Allocate a scratch register and write to it using the gfx ring (CIK).
2089 * Provides a basic gfx ring test to verify that the ring is working.
2090 * Used by cik_cp_gfx_resume();
2091 * Returns 0 on success, error on failure.
2093 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2100 r = radeon_scratch_get(rdev, &scratch);
2102 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2105 WREG32(scratch, 0xCAFEDEAD);
2106 r = radeon_ring_lock(rdev, ring, 3);
2108 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2109 radeon_scratch_free(rdev, scratch);
2112 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2113 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2114 radeon_ring_write(ring, 0xDEADBEEF);
2115 radeon_ring_unlock_commit(rdev, ring);
2117 for (i = 0; i < rdev->usec_timeout; i++) {
2118 tmp = RREG32(scratch);
2119 if (tmp == 0xDEADBEEF)
2123 if (i < rdev->usec_timeout) {
2124 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2126 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127 ring->idx, scratch, tmp);
2130 radeon_scratch_free(rdev, scratch);
2135 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2137 * @rdev: radeon_device pointer
2138 * @fence: radeon fence object
2140 * Emits a fence sequnce number on the gfx ring and flushes
2143 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2144 struct radeon_fence *fence)
2146 struct radeon_ring *ring = &rdev->ring[fence->ring];
2147 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2149 /* EVENT_WRITE_EOP - flush caches, send int */
2150 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2151 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2153 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2155 radeon_ring_write(ring, addr & 0xfffffffc);
2156 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157 radeon_ring_write(ring, fence->seq);
2158 radeon_ring_write(ring, 0);
2160 /* We should be using the new WAIT_REG_MEM special op packet here
2161 * but it causes the CP to hang
2163 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2164 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2165 WRITE_DATA_DST_SEL(0)));
2166 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2167 radeon_ring_write(ring, 0);
2168 radeon_ring_write(ring, 0);
2172 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2174 * @rdev: radeon_device pointer
2175 * @fence: radeon fence object
2177 * Emits a fence sequnce number on the compute ring and flushes
2180 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2181 struct radeon_fence *fence)
2183 struct radeon_ring *ring = &rdev->ring[fence->ring];
2184 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2186 /* RELEASE_MEM - flush caches, send int */
2187 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2188 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2190 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2192 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2193 radeon_ring_write(ring, addr & 0xfffffffc);
2194 radeon_ring_write(ring, upper_32_bits(addr));
2195 radeon_ring_write(ring, fence->seq);
2196 radeon_ring_write(ring, 0);
2198 /* We should be using the new WAIT_REG_MEM special op packet here
2199 * but it causes the CP to hang
2201 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2202 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2203 WRITE_DATA_DST_SEL(0)));
2204 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2205 radeon_ring_write(ring, 0);
2206 radeon_ring_write(ring, 0);
2209 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2210 struct radeon_ring *ring,
2211 struct radeon_semaphore *semaphore,
2214 uint64_t addr = semaphore->gpu_addr;
2215 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2217 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2218 radeon_ring_write(ring, addr & 0xffffffff);
2219 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2226 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2228 * @rdev: radeon_device pointer
2229 * @ib: radeon indirect buffer object
2231 * Emits an DE (drawing engine) or CE (constant engine) IB
2232 * on the gfx ring. IBs are usually generated by userspace
2233 * acceleration drivers and submitted to the kernel for
2234 * sheduling on the ring. This function schedules the IB
2235 * on the gfx ring for execution by the GPU.
2237 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2239 struct radeon_ring *ring = &rdev->ring[ib->ring];
2240 u32 header, control = INDIRECT_BUFFER_VALID;
2242 if (ib->is_const_ib) {
2243 /* set switch buffer packet before const IB */
2244 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2245 radeon_ring_write(ring, 0);
2247 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2250 if (ring->rptr_save_reg) {
2251 next_rptr = ring->wptr + 3 + 4;
2252 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2253 radeon_ring_write(ring, ((ring->rptr_save_reg -
2254 PACKET3_SET_UCONFIG_REG_START) >> 2));
2255 radeon_ring_write(ring, next_rptr);
2256 } else if (rdev->wb.enabled) {
2257 next_rptr = ring->wptr + 5 + 4;
2258 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2259 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2260 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2261 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2262 radeon_ring_write(ring, next_rptr);
2265 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2268 control |= ib->length_dw |
2269 (ib->vm ? (ib->vm->id << 24) : 0);
2271 radeon_ring_write(ring, header);
2272 radeon_ring_write(ring,
2276 (ib->gpu_addr & 0xFFFFFFFC));
2277 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2278 radeon_ring_write(ring, control);
2282 * cik_ib_test - basic gfx ring IB test
2284 * @rdev: radeon_device pointer
2285 * @ring: radeon_ring structure holding ring information
2287 * Allocate an IB and execute it on the gfx ring (CIK).
2288 * Provides a basic gfx ring test to verify that IBs are working.
2289 * Returns 0 on success, error on failure.
2291 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2293 struct radeon_ib ib;
2299 r = radeon_scratch_get(rdev, &scratch);
2301 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2304 WREG32(scratch, 0xCAFEDEAD);
2305 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2307 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2310 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2311 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2312 ib.ptr[2] = 0xDEADBEEF;
2314 r = radeon_ib_schedule(rdev, &ib, NULL);
2316 radeon_scratch_free(rdev, scratch);
2317 radeon_ib_free(rdev, &ib);
2318 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2321 r = radeon_fence_wait(ib.fence, false);
2323 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2326 for (i = 0; i < rdev->usec_timeout; i++) {
2327 tmp = RREG32(scratch);
2328 if (tmp == 0xDEADBEEF)
2332 if (i < rdev->usec_timeout) {
2333 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2335 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2339 radeon_scratch_free(rdev, scratch);
2340 radeon_ib_free(rdev, &ib);
2346 * On CIK, gfx and compute now have independant command processors.
2349 * Gfx consists of a single ring and can process both gfx jobs and
2350 * compute jobs. The gfx CP consists of three microengines (ME):
2351 * PFP - Pre-Fetch Parser
2353 * CE - Constant Engine
2354 * The PFP and ME make up what is considered the Drawing Engine (DE).
2355 * The CE is an asynchronous engine used for updating buffer desciptors
2356 * used by the DE so that they can be loaded into cache in parallel
2357 * while the DE is processing state update packets.
2360 * The compute CP consists of two microengines (ME):
2361 * MEC1 - Compute MicroEngine 1
2362 * MEC2 - Compute MicroEngine 2
2363 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364 * The queues are exposed to userspace and are programmed directly
2365 * by the compute runtime.
2368 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2370 * @rdev: radeon_device pointer
2371 * @enable: enable or disable the MEs
2373 * Halts or unhalts the gfx MEs.
2375 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2378 WREG32(CP_ME_CNTL, 0);
2380 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2381 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2387 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2389 * @rdev: radeon_device pointer
2391 * Loads the gfx PFP, ME, and CE ucode.
2392 * Returns 0 for success, -EINVAL if the ucode is not available.
2394 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2396 const __be32 *fw_data;
2399 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2402 cik_cp_gfx_enable(rdev, false);
2405 fw_data = (const __be32 *)rdev->pfp_fw->data;
2406 WREG32(CP_PFP_UCODE_ADDR, 0);
2407 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2408 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2409 WREG32(CP_PFP_UCODE_ADDR, 0);
2412 fw_data = (const __be32 *)rdev->ce_fw->data;
2413 WREG32(CP_CE_UCODE_ADDR, 0);
2414 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2415 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2416 WREG32(CP_CE_UCODE_ADDR, 0);
2419 fw_data = (const __be32 *)rdev->me_fw->data;
2420 WREG32(CP_ME_RAM_WADDR, 0);
2421 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2422 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2423 WREG32(CP_ME_RAM_WADDR, 0);
2425 WREG32(CP_PFP_UCODE_ADDR, 0);
2426 WREG32(CP_CE_UCODE_ADDR, 0);
2427 WREG32(CP_ME_RAM_WADDR, 0);
2428 WREG32(CP_ME_RAM_RADDR, 0);
2433 * cik_cp_gfx_start - start the gfx ring
2435 * @rdev: radeon_device pointer
2437 * Enables the ring and loads the clear state context and other
2438 * packets required to init the ring.
2439 * Returns 0 for success, error for failure.
2441 static int cik_cp_gfx_start(struct radeon_device *rdev)
2443 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2447 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2448 WREG32(CP_ENDIAN_SWAP, 0);
2449 WREG32(CP_DEVICE_ID, 1);
2451 cik_cp_gfx_enable(rdev, true);
2453 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2455 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2459 /* init the CE partitions. CE only used for gfx on CIK */
2460 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2461 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2462 radeon_ring_write(ring, 0xc000);
2463 radeon_ring_write(ring, 0xc000);
2465 /* setup clear context state */
2466 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2467 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2469 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2470 radeon_ring_write(ring, 0x80000000);
2471 radeon_ring_write(ring, 0x80000000);
2473 for (i = 0; i < cik_default_size; i++)
2474 radeon_ring_write(ring, cik_default_state[i]);
2476 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2477 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2479 /* set clear context state */
2480 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2481 radeon_ring_write(ring, 0);
2483 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2484 radeon_ring_write(ring, 0x00000316);
2485 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2488 radeon_ring_unlock_commit(rdev, ring);
2494 * cik_cp_gfx_fini - stop the gfx ring
2496 * @rdev: radeon_device pointer
2498 * Stop the gfx ring and tear down the driver ring
2501 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2503 cik_cp_gfx_enable(rdev, false);
2504 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2508 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2510 * @rdev: radeon_device pointer
2512 * Program the location and size of the gfx ring buffer
2513 * and test it to make sure it's working.
2514 * Returns 0 for success, error for failure.
2516 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2518 struct radeon_ring *ring;
2524 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2525 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2527 /* Set the write pointer delay */
2528 WREG32(CP_RB_WPTR_DELAY, 0);
2530 /* set the RB to use vmid 0 */
2531 WREG32(CP_RB_VMID, 0);
2533 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2535 /* ring 0 - compute and gfx */
2536 /* Set ring buffer size */
2537 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2538 rb_bufsz = drm_order(ring->ring_size / 8);
2539 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2541 tmp |= BUF_SWAP_32BIT;
2543 WREG32(CP_RB0_CNTL, tmp);
2545 /* Initialize the ring buffer's read and write pointers */
2546 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2548 WREG32(CP_RB0_WPTR, ring->wptr);
2550 /* set the wb address wether it's enabled or not */
2551 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2552 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2554 /* scratch register shadowing is no longer supported */
2555 WREG32(SCRATCH_UMSK, 0);
2557 if (!rdev->wb.enabled)
2558 tmp |= RB_NO_UPDATE;
2561 WREG32(CP_RB0_CNTL, tmp);
2563 rb_addr = ring->gpu_addr >> 8;
2564 WREG32(CP_RB0_BASE, rb_addr);
2565 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2567 ring->rptr = RREG32(CP_RB0_RPTR);
2569 /* start the ring */
2570 cik_cp_gfx_start(rdev);
2571 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2572 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2574 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2580 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2581 struct radeon_ring *ring)
2587 if (rdev->wb.enabled) {
2588 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2590 mutex_lock(&rdev->srbm_mutex);
2591 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2592 rptr = RREG32(CP_HQD_PQ_RPTR);
2593 cik_srbm_select(rdev, 0, 0, 0, 0);
2594 mutex_unlock(&rdev->srbm_mutex);
2596 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2601 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2602 struct radeon_ring *ring)
2606 if (rdev->wb.enabled) {
2607 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2609 mutex_lock(&rdev->srbm_mutex);
2610 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2611 wptr = RREG32(CP_HQD_PQ_WPTR);
2612 cik_srbm_select(rdev, 0, 0, 0, 0);
2613 mutex_unlock(&rdev->srbm_mutex);
2615 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2620 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2621 struct radeon_ring *ring)
2623 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2625 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2626 WDOORBELL32(ring->doorbell_offset, wptr);
2630 * cik_cp_compute_enable - enable/disable the compute CP MEs
2632 * @rdev: radeon_device pointer
2633 * @enable: enable or disable the MEs
2635 * Halts or unhalts the compute MEs.
2637 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2640 WREG32(CP_MEC_CNTL, 0);
2642 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2647 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2649 * @rdev: radeon_device pointer
2651 * Loads the compute MEC1&2 ucode.
2652 * Returns 0 for success, -EINVAL if the ucode is not available.
2654 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2656 const __be32 *fw_data;
2662 cik_cp_compute_enable(rdev, false);
2665 fw_data = (const __be32 *)rdev->mec_fw->data;
2666 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2667 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2668 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2669 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2671 if (rdev->family == CHIP_KAVERI) {
2673 fw_data = (const __be32 *)rdev->mec_fw->data;
2674 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2675 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2676 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2677 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2684 * cik_cp_compute_start - start the compute queues
2686 * @rdev: radeon_device pointer
2688 * Enable the compute queues.
2689 * Returns 0 for success, error for failure.
2691 static int cik_cp_compute_start(struct radeon_device *rdev)
2693 cik_cp_compute_enable(rdev, true);
2699 * cik_cp_compute_fini - stop the compute queues
2701 * @rdev: radeon_device pointer
2703 * Stop the compute queues and tear down the driver queue
2706 static void cik_cp_compute_fini(struct radeon_device *rdev)
2710 cik_cp_compute_enable(rdev, false);
2712 for (i = 0; i < 2; i++) {
2714 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2716 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2718 if (rdev->ring[idx].mqd_obj) {
2719 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2720 if (unlikely(r != 0))
2721 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2723 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2724 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2726 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2727 rdev->ring[idx].mqd_obj = NULL;
2732 static void cik_mec_fini(struct radeon_device *rdev)
2736 if (rdev->mec.hpd_eop_obj) {
2737 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2738 if (unlikely(r != 0))
2739 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2740 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2741 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2743 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2744 rdev->mec.hpd_eop_obj = NULL;
2748 #define MEC_HPD_SIZE 2048
2750 static int cik_mec_init(struct radeon_device *rdev)
2756 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2757 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2759 if (rdev->family == CHIP_KAVERI)
2760 rdev->mec.num_mec = 2;
2762 rdev->mec.num_mec = 1;
2763 rdev->mec.num_pipe = 4;
2764 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2766 if (rdev->mec.hpd_eop_obj == NULL) {
2767 r = radeon_bo_create(rdev,
2768 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2770 RADEON_GEM_DOMAIN_GTT, NULL,
2771 &rdev->mec.hpd_eop_obj);
2773 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2778 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2779 if (unlikely(r != 0)) {
2783 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2784 &rdev->mec.hpd_eop_gpu_addr);
2786 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2790 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2792 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2797 /* clear memory. Not sure if this is required or not */
2798 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2800 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2801 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2806 struct hqd_registers
2808 u32 cp_mqd_base_addr;
2809 u32 cp_mqd_base_addr_hi;
2812 u32 cp_hqd_persistent_state;
2813 u32 cp_hqd_pipe_priority;
2814 u32 cp_hqd_queue_priority;
2817 u32 cp_hqd_pq_base_hi;
2819 u32 cp_hqd_pq_rptr_report_addr;
2820 u32 cp_hqd_pq_rptr_report_addr_hi;
2821 u32 cp_hqd_pq_wptr_poll_addr;
2822 u32 cp_hqd_pq_wptr_poll_addr_hi;
2823 u32 cp_hqd_pq_doorbell_control;
2825 u32 cp_hqd_pq_control;
2826 u32 cp_hqd_ib_base_addr;
2827 u32 cp_hqd_ib_base_addr_hi;
2829 u32 cp_hqd_ib_control;
2830 u32 cp_hqd_iq_timer;
2832 u32 cp_hqd_dequeue_request;
2833 u32 cp_hqd_dma_offload;
2834 u32 cp_hqd_sema_cmd;
2835 u32 cp_hqd_msg_type;
2836 u32 cp_hqd_atomic0_preop_lo;
2837 u32 cp_hqd_atomic0_preop_hi;
2838 u32 cp_hqd_atomic1_preop_lo;
2839 u32 cp_hqd_atomic1_preop_hi;
2840 u32 cp_hqd_hq_scheduler0;
2841 u32 cp_hqd_hq_scheduler1;
2848 u32 dispatch_initiator;
2852 u32 pipeline_stat_enable;
2853 u32 perf_counter_enable;
2859 u32 resource_limits;
2860 u32 static_thread_mgmt01[2];
2862 u32 static_thread_mgmt23[2];
2864 u32 thread_trace_enable;
2867 u32 vgtcs_invoke_count[2];
2868 struct hqd_registers queue_state;
2870 u32 interrupt_queue[64];
2874 * cik_cp_compute_resume - setup the compute queue registers
2876 * @rdev: radeon_device pointer
2878 * Program the compute queues and test them to make sure they
2880 * Returns 0 for success, error for failure.
2882 static int cik_cp_compute_resume(struct radeon_device *rdev)
2886 bool use_doorbell = true;
2892 struct bonaire_mqd *mqd;
2894 r = cik_cp_compute_start(rdev);
2898 /* fix up chicken bits */
2899 tmp = RREG32(CP_CPF_DEBUG);
2901 WREG32(CP_CPF_DEBUG, tmp);
2903 /* init the pipes */
2904 mutex_lock(&rdev->srbm_mutex);
2905 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2906 int me = (i < 4) ? 1 : 2;
2907 int pipe = (i < 4) ? i : (i - 4);
2909 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2911 cik_srbm_select(rdev, me, pipe, 0, 0);
2913 /* write the EOP addr */
2914 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2915 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2917 /* set the VMID assigned */
2918 WREG32(CP_HPD_EOP_VMID, 0);
2920 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2921 tmp = RREG32(CP_HPD_EOP_CONTROL);
2922 tmp &= ~EOP_SIZE_MASK;
2923 tmp |= drm_order(MEC_HPD_SIZE / 8);
2924 WREG32(CP_HPD_EOP_CONTROL, tmp);
2926 cik_srbm_select(rdev, 0, 0, 0, 0);
2927 mutex_unlock(&rdev->srbm_mutex);
2929 /* init the queues. Just two for now. */
2930 for (i = 0; i < 2; i++) {
2932 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2934 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2936 if (rdev->ring[idx].mqd_obj == NULL) {
2937 r = radeon_bo_create(rdev,
2938 sizeof(struct bonaire_mqd),
2940 RADEON_GEM_DOMAIN_GTT, NULL,
2941 &rdev->ring[idx].mqd_obj);
2943 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2948 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2949 if (unlikely(r != 0)) {
2950 cik_cp_compute_fini(rdev);
2953 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2956 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2957 cik_cp_compute_fini(rdev);
2960 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2962 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2963 cik_cp_compute_fini(rdev);
2967 /* doorbell offset */
2968 rdev->ring[idx].doorbell_offset =
2969 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2971 /* init the mqd struct */
2972 memset(buf, 0, sizeof(struct bonaire_mqd));
2974 mqd = (struct bonaire_mqd *)buf;
2975 mqd->header = 0xC0310800;
2976 mqd->static_thread_mgmt01[0] = 0xffffffff;
2977 mqd->static_thread_mgmt01[1] = 0xffffffff;
2978 mqd->static_thread_mgmt23[0] = 0xffffffff;
2979 mqd->static_thread_mgmt23[1] = 0xffffffff;
2981 mutex_lock(&rdev->srbm_mutex);
2982 cik_srbm_select(rdev, rdev->ring[idx].me,
2983 rdev->ring[idx].pipe,
2984 rdev->ring[idx].queue, 0);
2986 /* disable wptr polling */
2987 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2988 tmp &= ~WPTR_POLL_EN;
2989 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2991 /* enable doorbell? */
2992 mqd->queue_state.cp_hqd_pq_doorbell_control =
2993 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2995 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2997 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2998 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2999 mqd->queue_state.cp_hqd_pq_doorbell_control);
3001 /* disable the queue if it's active */
3002 mqd->queue_state.cp_hqd_dequeue_request = 0;
3003 mqd->queue_state.cp_hqd_pq_rptr = 0;
3004 mqd->queue_state.cp_hqd_pq_wptr= 0;
3005 if (RREG32(CP_HQD_ACTIVE) & 1) {
3006 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3007 for (i = 0; i < rdev->usec_timeout; i++) {
3008 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3012 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3013 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3014 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3017 /* set the pointer to the MQD */
3018 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3019 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3020 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3021 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3022 /* set MQD vmid to 0 */
3023 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3024 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3025 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3027 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3028 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3029 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3030 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3031 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3032 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3034 /* set up the HQD, this is similar to CP_RB0_CNTL */
3035 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3036 mqd->queue_state.cp_hqd_pq_control &=
3037 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3039 mqd->queue_state.cp_hqd_pq_control |=
3040 drm_order(rdev->ring[idx].ring_size / 8);
3041 mqd->queue_state.cp_hqd_pq_control |=
3042 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3044 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3046 mqd->queue_state.cp_hqd_pq_control &=
3047 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3048 mqd->queue_state.cp_hqd_pq_control |=
3049 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3050 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3052 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3054 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3056 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3057 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3058 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3059 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3060 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3061 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3063 /* set the wb address wether it's enabled or not */
3065 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3067 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3068 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3069 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3070 upper_32_bits(wb_gpu_addr) & 0xffff;
3071 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3072 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3073 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3074 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3076 /* enable the doorbell if requested */
3078 mqd->queue_state.cp_hqd_pq_doorbell_control =
3079 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3080 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3081 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3082 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3083 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3084 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3085 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3088 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3090 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3091 mqd->queue_state.cp_hqd_pq_doorbell_control);
3093 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3094 rdev->ring[idx].wptr = 0;
3095 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3096 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3097 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3098 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3100 /* set the vmid for the queue */
3101 mqd->queue_state.cp_hqd_vmid = 0;
3102 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3104 /* activate the queue */
3105 mqd->queue_state.cp_hqd_active = 1;
3106 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3108 cik_srbm_select(rdev, 0, 0, 0, 0);
3109 mutex_unlock(&rdev->srbm_mutex);
3111 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3112 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3114 rdev->ring[idx].ready = true;
3115 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3117 rdev->ring[idx].ready = false;
3123 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3125 cik_cp_gfx_enable(rdev, enable);
3126 cik_cp_compute_enable(rdev, enable);
3129 static int cik_cp_load_microcode(struct radeon_device *rdev)
3133 r = cik_cp_gfx_load_microcode(rdev);
3136 r = cik_cp_compute_load_microcode(rdev);
3143 static void cik_cp_fini(struct radeon_device *rdev)
3145 cik_cp_gfx_fini(rdev);
3146 cik_cp_compute_fini(rdev);
3149 static int cik_cp_resume(struct radeon_device *rdev)
3153 /* Reset all cp blocks */
3154 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3155 RREG32(GRBM_SOFT_RESET);
3157 WREG32(GRBM_SOFT_RESET, 0);
3158 RREG32(GRBM_SOFT_RESET);
3160 r = cik_cp_load_microcode(rdev);
3164 r = cik_cp_gfx_resume(rdev);
3167 r = cik_cp_compute_resume(rdev);
3176 * Starting with CIK, the GPU has new asynchronous
3177 * DMA engines. These engines are used for compute
3178 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3179 * and each one supports 1 ring buffer used for gfx
3180 * and 2 queues used for compute.
3182 * The programming model is very similar to the CP
3183 * (ring buffer, IBs, etc.), but sDMA has it's own
3184 * packet format that is different from the PM4 format
3185 * used by the CP. sDMA supports copying data, writing
3186 * embedded data, solid fills, and a number of other
3187 * things. It also has support for tiling/detiling of
3191 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3193 * @rdev: radeon_device pointer
3194 * @ib: IB object to schedule
3196 * Schedule an IB in the DMA ring (CIK).
3198 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3199 struct radeon_ib *ib)
3201 struct radeon_ring *ring = &rdev->ring[ib->ring];
3202 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3204 if (rdev->wb.enabled) {
3205 u32 next_rptr = ring->wptr + 5;
3206 while ((next_rptr & 7) != 4)
3209 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3210 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3211 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3212 radeon_ring_write(ring, 1); /* number of DWs to follow */
3213 radeon_ring_write(ring, next_rptr);
3216 /* IB packet must end on a 8 DW boundary */
3217 while ((ring->wptr & 7) != 4)
3218 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3219 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3220 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3221 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3222 radeon_ring_write(ring, ib->length_dw);
3227 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3229 * @rdev: radeon_device pointer
3230 * @fence: radeon fence object
3232 * Add a DMA fence packet to the ring to write
3233 * the fence seq number and DMA trap packet to generate
3234 * an interrupt if needed (CIK).
3236 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3237 struct radeon_fence *fence)
3239 struct radeon_ring *ring = &rdev->ring[fence->ring];
3240 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3241 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3242 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3245 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3246 ref_and_mask = SDMA0;
3248 ref_and_mask = SDMA1;
3250 /* write the fence */
3251 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3252 radeon_ring_write(ring, addr & 0xffffffff);
3253 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3254 radeon_ring_write(ring, fence->seq);
3255 /* generate an interrupt */
3256 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3258 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3259 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3260 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3261 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3262 radeon_ring_write(ring, ref_and_mask); /* MASK */
3263 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3267 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3269 * @rdev: radeon_device pointer
3270 * @ring: radeon_ring structure holding ring information
3271 * @semaphore: radeon semaphore object
3272 * @emit_wait: wait or signal semaphore
3274 * Add a DMA semaphore packet to the ring wait on or signal
3275 * other rings (CIK).
3277 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3278 struct radeon_ring *ring,
3279 struct radeon_semaphore *semaphore,
3282 u64 addr = semaphore->gpu_addr;
3283 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3285 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3286 radeon_ring_write(ring, addr & 0xfffffff8);
3287 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3291 * cik_sdma_gfx_stop - stop the gfx async dma engines
3293 * @rdev: radeon_device pointer
3295 * Stop the gfx async dma ring buffers (CIK).
3297 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3299 u32 rb_cntl, reg_offset;
3302 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3304 for (i = 0; i < 2; i++) {
3306 reg_offset = SDMA0_REGISTER_OFFSET;
3308 reg_offset = SDMA1_REGISTER_OFFSET;
3309 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3310 rb_cntl &= ~SDMA_RB_ENABLE;
3311 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3312 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3317 * cik_sdma_rlc_stop - stop the compute async dma engines
3319 * @rdev: radeon_device pointer
3321 * Stop the compute async dma queues (CIK).
3323 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3329 * cik_sdma_enable - stop the async dma engines
3331 * @rdev: radeon_device pointer
3332 * @enable: enable/disable the DMA MEs.
3334 * Halt or unhalt the async dma engines (CIK).
3336 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3338 u32 me_cntl, reg_offset;
3341 for (i = 0; i < 2; i++) {
3343 reg_offset = SDMA0_REGISTER_OFFSET;
3345 reg_offset = SDMA1_REGISTER_OFFSET;
3346 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3348 me_cntl &= ~SDMA_HALT;
3350 me_cntl |= SDMA_HALT;
3351 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3356 * cik_sdma_gfx_resume - setup and start the async dma engines
3358 * @rdev: radeon_device pointer
3360 * Set up the gfx DMA ring buffers and enable them (CIK).
3361 * Returns 0 for success, error for failure.
3363 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3365 struct radeon_ring *ring;
3366 u32 rb_cntl, ib_cntl;
3368 u32 reg_offset, wb_offset;
3371 for (i = 0; i < 2; i++) {
3373 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3374 reg_offset = SDMA0_REGISTER_OFFSET;
3375 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3377 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3378 reg_offset = SDMA1_REGISTER_OFFSET;
3379 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3382 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3383 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3385 /* Set ring buffer size in dwords */
3386 rb_bufsz = drm_order(ring->ring_size / 4);
3387 rb_cntl = rb_bufsz << 1;
3389 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3391 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3393 /* Initialize the ring buffer's read and write pointers */
3394 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3395 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3397 /* set the wb address whether it's enabled or not */
3398 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3399 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3400 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3401 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3403 if (rdev->wb.enabled)
3404 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3406 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3407 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3410 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3412 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3415 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3417 ib_cntl = SDMA_IB_ENABLE;
3419 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3421 /* enable DMA IBs */
3422 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3426 r = radeon_ring_test(rdev, ring->idx, ring);
3428 ring->ready = false;
3433 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3439 * cik_sdma_rlc_resume - setup and start the async dma engines
3441 * @rdev: radeon_device pointer
3443 * Set up the compute DMA queues and enable them (CIK).
3444 * Returns 0 for success, error for failure.
3446 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3453 * cik_sdma_load_microcode - load the sDMA ME ucode
3455 * @rdev: radeon_device pointer
3457 * Loads the sDMA0/1 ucode.
3458 * Returns 0 for success, -EINVAL if the ucode is not available.
3460 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3462 const __be32 *fw_data;
3468 /* stop the gfx rings and rlc compute queues */
3469 cik_sdma_gfx_stop(rdev);
3470 cik_sdma_rlc_stop(rdev);
3473 cik_sdma_enable(rdev, false);
3476 fw_data = (const __be32 *)rdev->sdma_fw->data;
3477 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3478 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3479 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3480 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3483 fw_data = (const __be32 *)rdev->sdma_fw->data;
3484 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3485 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3486 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3487 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3489 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3490 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3495 * cik_sdma_resume - setup and start the async dma engines
3497 * @rdev: radeon_device pointer
3499 * Set up the DMA engines and enable them (CIK).
3500 * Returns 0 for success, error for failure.
3502 static int cik_sdma_resume(struct radeon_device *rdev)
3507 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3508 RREG32(SRBM_SOFT_RESET);
3510 WREG32(SRBM_SOFT_RESET, 0);
3511 RREG32(SRBM_SOFT_RESET);
3513 r = cik_sdma_load_microcode(rdev);
3517 /* unhalt the MEs */
3518 cik_sdma_enable(rdev, true);
3520 /* start the gfx rings and rlc compute queues */
3521 r = cik_sdma_gfx_resume(rdev);
3524 r = cik_sdma_rlc_resume(rdev);
3532 * cik_sdma_fini - tear down the async dma engines
3534 * @rdev: radeon_device pointer
3536 * Stop the async dma engines and free the rings (CIK).
3538 static void cik_sdma_fini(struct radeon_device *rdev)
3540 /* stop the gfx rings and rlc compute queues */
3541 cik_sdma_gfx_stop(rdev);
3542 cik_sdma_rlc_stop(rdev);
3544 cik_sdma_enable(rdev, false);
3545 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3546 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3547 /* XXX - compute dma queue tear down */
3551 * cik_copy_dma - copy pages using the DMA engine
3553 * @rdev: radeon_device pointer
3554 * @src_offset: src GPU address
3555 * @dst_offset: dst GPU address
3556 * @num_gpu_pages: number of GPU pages to xfer
3557 * @fence: radeon fence object
3559 * Copy GPU paging using the DMA engine (CIK).
3560 * Used by the radeon ttm implementation to move pages if
3561 * registered as the asic copy callback.
3563 int cik_copy_dma(struct radeon_device *rdev,
3564 uint64_t src_offset, uint64_t dst_offset,
3565 unsigned num_gpu_pages,
3566 struct radeon_fence **fence)
3568 struct radeon_semaphore *sem = NULL;
3569 int ring_index = rdev->asic->copy.dma_ring_index;
3570 struct radeon_ring *ring = &rdev->ring[ring_index];
3571 u32 size_in_bytes, cur_size_in_bytes;
3575 r = radeon_semaphore_create(rdev, &sem);
3577 DRM_ERROR("radeon: moving bo (%d).\n", r);
3581 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3582 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3583 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3585 DRM_ERROR("radeon: moving bo (%d).\n", r);
3586 radeon_semaphore_free(rdev, &sem, NULL);
3590 if (radeon_fence_need_sync(*fence, ring->idx)) {
3591 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3593 radeon_fence_note_sync(*fence, ring->idx);
3595 radeon_semaphore_free(rdev, &sem, NULL);
3598 for (i = 0; i < num_loops; i++) {
3599 cur_size_in_bytes = size_in_bytes;
3600 if (cur_size_in_bytes > 0x1fffff)
3601 cur_size_in_bytes = 0x1fffff;
3602 size_in_bytes -= cur_size_in_bytes;
3603 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3604 radeon_ring_write(ring, cur_size_in_bytes);
3605 radeon_ring_write(ring, 0); /* src/dst endian swap */
3606 radeon_ring_write(ring, src_offset & 0xffffffff);
3607 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3608 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3609 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3610 src_offset += cur_size_in_bytes;
3611 dst_offset += cur_size_in_bytes;
3614 r = radeon_fence_emit(rdev, fence, ring->idx);
3616 radeon_ring_unlock_undo(rdev, ring);
3620 radeon_ring_unlock_commit(rdev, ring);
3621 radeon_semaphore_free(rdev, &sem, *fence);
3627 * cik_sdma_ring_test - simple async dma engine test
3629 * @rdev: radeon_device pointer
3630 * @ring: radeon_ring structure holding ring information
3632 * Test the DMA engine by writing using it to write an
3633 * value to memory. (CIK).
3634 * Returns 0 for success, error for failure.
3636 int cik_sdma_ring_test(struct radeon_device *rdev,
3637 struct radeon_ring *ring)
3641 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3645 DRM_ERROR("invalid vram scratch pointer\n");
3652 r = radeon_ring_lock(rdev, ring, 4);
3654 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3657 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3658 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3659 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3660 radeon_ring_write(ring, 1); /* number of DWs to follow */
3661 radeon_ring_write(ring, 0xDEADBEEF);
3662 radeon_ring_unlock_commit(rdev, ring);
3664 for (i = 0; i < rdev->usec_timeout; i++) {
3666 if (tmp == 0xDEADBEEF)
3671 if (i < rdev->usec_timeout) {
3672 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3674 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3682 * cik_sdma_ib_test - test an IB on the DMA engine
3684 * @rdev: radeon_device pointer
3685 * @ring: radeon_ring structure holding ring information
3687 * Test a simple IB in the DMA ring (CIK).
3688 * Returns 0 on success, error on failure.
3690 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3692 struct radeon_ib ib;
3695 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3699 DRM_ERROR("invalid vram scratch pointer\n");
3706 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3708 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3712 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3713 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3714 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3716 ib.ptr[4] = 0xDEADBEEF;
3719 r = radeon_ib_schedule(rdev, &ib, NULL);
3721 radeon_ib_free(rdev, &ib);
3722 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3725 r = radeon_fence_wait(ib.fence, false);
3727 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3730 for (i = 0; i < rdev->usec_timeout; i++) {
3732 if (tmp == 0xDEADBEEF)
3736 if (i < rdev->usec_timeout) {
3737 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3739 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3742 radeon_ib_free(rdev, &ib);
3747 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3749 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3750 RREG32(GRBM_STATUS));
3751 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3752 RREG32(GRBM_STATUS2));
3753 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3754 RREG32(GRBM_STATUS_SE0));
3755 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3756 RREG32(GRBM_STATUS_SE1));
3757 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3758 RREG32(GRBM_STATUS_SE2));
3759 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3760 RREG32(GRBM_STATUS_SE3));
3761 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3762 RREG32(SRBM_STATUS));
3763 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3764 RREG32(SRBM_STATUS2));
3765 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3766 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3767 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3768 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3769 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3770 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3771 RREG32(CP_STALLED_STAT1));
3772 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3773 RREG32(CP_STALLED_STAT2));
3774 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3775 RREG32(CP_STALLED_STAT3));
3776 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3777 RREG32(CP_CPF_BUSY_STAT));
3778 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3779 RREG32(CP_CPF_STALLED_STAT1));
3780 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3781 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3782 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3783 RREG32(CP_CPC_STALLED_STAT1));
3784 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3788 * cik_gpu_check_soft_reset - check which blocks are busy
3790 * @rdev: radeon_device pointer
3792 * Check which blocks are busy and return the relevant reset
3793 * mask to be used by cik_gpu_soft_reset().
3794 * Returns a mask of the blocks to be reset.
3796 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3802 tmp = RREG32(GRBM_STATUS);
3803 if (tmp & (PA_BUSY | SC_BUSY |
3804 BCI_BUSY | SX_BUSY |
3805 TA_BUSY | VGT_BUSY |
3807 GDS_BUSY | SPI_BUSY |
3808 IA_BUSY | IA_BUSY_NO_DMA))
3809 reset_mask |= RADEON_RESET_GFX;
3811 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3812 reset_mask |= RADEON_RESET_CP;
3815 tmp = RREG32(GRBM_STATUS2);
3817 reset_mask |= RADEON_RESET_RLC;
3819 /* SDMA0_STATUS_REG */
3820 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3821 if (!(tmp & SDMA_IDLE))
3822 reset_mask |= RADEON_RESET_DMA;
3824 /* SDMA1_STATUS_REG */
3825 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3826 if (!(tmp & SDMA_IDLE))
3827 reset_mask |= RADEON_RESET_DMA1;
3830 tmp = RREG32(SRBM_STATUS2);
3831 if (tmp & SDMA_BUSY)
3832 reset_mask |= RADEON_RESET_DMA;
3834 if (tmp & SDMA1_BUSY)
3835 reset_mask |= RADEON_RESET_DMA1;
3838 tmp = RREG32(SRBM_STATUS);
3841 reset_mask |= RADEON_RESET_IH;
3844 reset_mask |= RADEON_RESET_SEM;
3846 if (tmp & GRBM_RQ_PENDING)
3847 reset_mask |= RADEON_RESET_GRBM;
3850 reset_mask |= RADEON_RESET_VMC;
3852 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3853 MCC_BUSY | MCD_BUSY))
3854 reset_mask |= RADEON_RESET_MC;
3856 if (evergreen_is_display_hung(rdev))
3857 reset_mask |= RADEON_RESET_DISPLAY;
3859 /* Skip MC reset as it's mostly likely not hung, just busy */
3860 if (reset_mask & RADEON_RESET_MC) {
3861 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3862 reset_mask &= ~RADEON_RESET_MC;
3869 * cik_gpu_soft_reset - soft reset GPU
3871 * @rdev: radeon_device pointer
3872 * @reset_mask: mask of which blocks to reset
3874 * Soft reset the blocks specified in @reset_mask.
3876 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3878 struct evergreen_mc_save save;
3879 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3882 if (reset_mask == 0)
3885 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3887 cik_print_gpu_status_regs(rdev);
3888 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3889 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3890 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3891 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3896 /* Disable GFX parsing/prefetching */
3897 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3899 /* Disable MEC parsing/prefetching */
3900 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3902 if (reset_mask & RADEON_RESET_DMA) {
3904 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3906 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3908 if (reset_mask & RADEON_RESET_DMA1) {
3910 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3912 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3915 evergreen_mc_stop(rdev, &save);
3916 if (evergreen_mc_wait_for_idle(rdev)) {
3917 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3920 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3921 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3923 if (reset_mask & RADEON_RESET_CP) {
3924 grbm_soft_reset |= SOFT_RESET_CP;
3926 srbm_soft_reset |= SOFT_RESET_GRBM;
3929 if (reset_mask & RADEON_RESET_DMA)
3930 srbm_soft_reset |= SOFT_RESET_SDMA;
3932 if (reset_mask & RADEON_RESET_DMA1)
3933 srbm_soft_reset |= SOFT_RESET_SDMA1;
3935 if (reset_mask & RADEON_RESET_DISPLAY)
3936 srbm_soft_reset |= SOFT_RESET_DC;
3938 if (reset_mask & RADEON_RESET_RLC)
3939 grbm_soft_reset |= SOFT_RESET_RLC;
3941 if (reset_mask & RADEON_RESET_SEM)
3942 srbm_soft_reset |= SOFT_RESET_SEM;
3944 if (reset_mask & RADEON_RESET_IH)
3945 srbm_soft_reset |= SOFT_RESET_IH;
3947 if (reset_mask & RADEON_RESET_GRBM)
3948 srbm_soft_reset |= SOFT_RESET_GRBM;
3950 if (reset_mask & RADEON_RESET_VMC)
3951 srbm_soft_reset |= SOFT_RESET_VMC;
3953 if (!(rdev->flags & RADEON_IS_IGP)) {
3954 if (reset_mask & RADEON_RESET_MC)
3955 srbm_soft_reset |= SOFT_RESET_MC;
3958 if (grbm_soft_reset) {
3959 tmp = RREG32(GRBM_SOFT_RESET);
3960 tmp |= grbm_soft_reset;
3961 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3962 WREG32(GRBM_SOFT_RESET, tmp);
3963 tmp = RREG32(GRBM_SOFT_RESET);
3967 tmp &= ~grbm_soft_reset;
3968 WREG32(GRBM_SOFT_RESET, tmp);
3969 tmp = RREG32(GRBM_SOFT_RESET);
3972 if (srbm_soft_reset) {
3973 tmp = RREG32(SRBM_SOFT_RESET);
3974 tmp |= srbm_soft_reset;
3975 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3976 WREG32(SRBM_SOFT_RESET, tmp);
3977 tmp = RREG32(SRBM_SOFT_RESET);
3981 tmp &= ~srbm_soft_reset;
3982 WREG32(SRBM_SOFT_RESET, tmp);
3983 tmp = RREG32(SRBM_SOFT_RESET);
3986 /* Wait a little for things to settle down */
3989 evergreen_mc_resume(rdev, &save);
3992 cik_print_gpu_status_regs(rdev);
3996 * cik_asic_reset - soft reset GPU
3998 * @rdev: radeon_device pointer
4000 * Look up which blocks are hung and attempt
4002 * Returns 0 for success.
4004 int cik_asic_reset(struct radeon_device *rdev)
4008 reset_mask = cik_gpu_check_soft_reset(rdev);
4011 r600_set_bios_scratch_engine_hung(rdev, true);
4013 cik_gpu_soft_reset(rdev, reset_mask);
4015 reset_mask = cik_gpu_check_soft_reset(rdev);
4018 r600_set_bios_scratch_engine_hung(rdev, false);
4024 * cik_gfx_is_lockup - check if the 3D engine is locked up
4026 * @rdev: radeon_device pointer
4027 * @ring: radeon_ring structure holding ring information
4029 * Check if the 3D engine is locked up (CIK).
4030 * Returns true if the engine is locked, false if not.
4032 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4034 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4036 if (!(reset_mask & (RADEON_RESET_GFX |
4037 RADEON_RESET_COMPUTE |
4038 RADEON_RESET_CP))) {
4039 radeon_ring_lockup_update(ring);
4042 /* force CP activities */
4043 radeon_ring_force_activity(rdev, ring);
4044 return radeon_ring_test_lockup(rdev, ring);
4048 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4050 * @rdev: radeon_device pointer
4051 * @ring: radeon_ring structure holding ring information
4053 * Check if the async DMA engine is locked up (CIK).
4054 * Returns true if the engine appears to be locked up, false if not.
4056 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4058 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4061 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4062 mask = RADEON_RESET_DMA;
4064 mask = RADEON_RESET_DMA1;
4066 if (!(reset_mask & mask)) {
4067 radeon_ring_lockup_update(ring);
4070 /* force ring activities */
4071 radeon_ring_force_activity(rdev, ring);
4072 return radeon_ring_test_lockup(rdev, ring);
4077 * cik_mc_program - program the GPU memory controller
4079 * @rdev: radeon_device pointer
4081 * Set the location of vram, gart, and AGP in the GPU's
4082 * physical address space (CIK).
4084 static void cik_mc_program(struct radeon_device *rdev)
4086 struct evergreen_mc_save save;
4090 /* Initialize HDP */
4091 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4092 WREG32((0x2c14 + j), 0x00000000);
4093 WREG32((0x2c18 + j), 0x00000000);
4094 WREG32((0x2c1c + j), 0x00000000);
4095 WREG32((0x2c20 + j), 0x00000000);
4096 WREG32((0x2c24 + j), 0x00000000);
4098 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4100 evergreen_mc_stop(rdev, &save);
4101 if (radeon_mc_wait_for_idle(rdev)) {
4102 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4104 /* Lockout access through VGA aperture*/
4105 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4106 /* Update configuration */
4107 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4108 rdev->mc.vram_start >> 12);
4109 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4110 rdev->mc.vram_end >> 12);
4111 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4112 rdev->vram_scratch.gpu_addr >> 12);
4113 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4114 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4115 WREG32(MC_VM_FB_LOCATION, tmp);
4116 /* XXX double check these! */
4117 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4118 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4119 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4120 WREG32(MC_VM_AGP_BASE, 0);
4121 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4122 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4123 if (radeon_mc_wait_for_idle(rdev)) {
4124 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4126 evergreen_mc_resume(rdev, &save);
4127 /* we need to own VRAM, so turn off the VGA renderer here
4128 * to stop it overwriting our objects */
4129 rv515_vga_render_disable(rdev);
4133 * cik_mc_init - initialize the memory controller driver params
4135 * @rdev: radeon_device pointer
4137 * Look up the amount of vram, vram width, and decide how to place
4138 * vram and gart within the GPU's physical address space (CIK).
4139 * Returns 0 for success.
4141 static int cik_mc_init(struct radeon_device *rdev)
4144 int chansize, numchan;
4146 /* Get VRAM informations */
4147 rdev->mc.vram_is_ddr = true;
4148 tmp = RREG32(MC_ARB_RAMCFG);
4149 if (tmp & CHANSIZE_MASK) {
4154 tmp = RREG32(MC_SHARED_CHMAP);
4155 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4185 rdev->mc.vram_width = numchan * chansize;
4186 /* Could aper size report 0 ? */
4187 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4188 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4189 /* size in MB on si */
4190 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4191 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4192 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4193 si_vram_gtt_location(rdev, &rdev->mc);
4194 radeon_update_bandwidth_info(rdev);
4201 * VMID 0 is the physical GPU addresses as used by the kernel.
4202 * VMIDs 1-15 are used for userspace clients and are handled
4203 * by the radeon vm/hsa code.
4206 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4208 * @rdev: radeon_device pointer
4210 * Flush the TLB for the VMID 0 page table (CIK).
4212 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4214 /* flush hdp cache */
4215 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4217 /* bits 0-15 are the VM contexts0-15 */
4218 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4222 * cik_pcie_gart_enable - gart enable
4224 * @rdev: radeon_device pointer
4226 * This sets up the TLBs, programs the page tables for VMID0,
4227 * sets up the hw for VMIDs 1-15 which are allocated on
4228 * demand, and sets up the global locations for the LDS, GDS,
4229 * and GPUVM for FSA64 clients (CIK).
4230 * Returns 0 for success, errors for failure.
4232 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4236 if (rdev->gart.robj == NULL) {
4237 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4240 r = radeon_gart_table_vram_pin(rdev);
4243 radeon_gart_restore(rdev);
4244 /* Setup TLB control */
4245 WREG32(MC_VM_MX_L1_TLB_CNTL,
4248 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4249 ENABLE_ADVANCED_DRIVER_MODEL |
4250 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4251 /* Setup L2 cache */
4252 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4253 ENABLE_L2_FRAGMENT_PROCESSING |
4254 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4255 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4256 EFFECTIVE_L2_QUEUE_SIZE(7) |
4257 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4258 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4259 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4260 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4261 /* setup context0 */
4262 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4263 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4264 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4265 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4266 (u32)(rdev->dummy_page.addr >> 12));
4267 WREG32(VM_CONTEXT0_CNTL2, 0);
4268 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4269 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4275 /* empty context1-15 */
4276 /* FIXME start with 4G, once using 2 level pt switch to full
4279 /* set vm size, must be a multiple of 4 */
4280 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4281 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4282 for (i = 1; i < 16; i++) {
4284 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4285 rdev->gart.table_addr >> 12);
4287 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4288 rdev->gart.table_addr >> 12);
4291 /* enable context1-15 */
4292 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4293 (u32)(rdev->dummy_page.addr >> 12));
4294 WREG32(VM_CONTEXT1_CNTL2, 4);
4295 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4296 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4298 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4300 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4301 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4302 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4303 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4304 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4305 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4306 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4307 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4309 /* TC cache setup ??? */
4310 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4311 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4312 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4314 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4315 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4316 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4317 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4318 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4320 WREG32(TC_CFG_L1_VOLATILE, 0);
4321 WREG32(TC_CFG_L2_VOLATILE, 0);
4323 if (rdev->family == CHIP_KAVERI) {
4324 u32 tmp = RREG32(CHUB_CONTROL);
4326 WREG32(CHUB_CONTROL, tmp);
4329 /* XXX SH_MEM regs */
4330 /* where to put LDS, scratch, GPUVM in FSA64 space */
4331 mutex_lock(&rdev->srbm_mutex);
4332 for (i = 0; i < 16; i++) {
4333 cik_srbm_select(rdev, 0, 0, 0, i);
4334 /* CP and shaders */
4335 WREG32(SH_MEM_CONFIG, 0);
4336 WREG32(SH_MEM_APE1_BASE, 1);
4337 WREG32(SH_MEM_APE1_LIMIT, 0);
4338 WREG32(SH_MEM_BASES, 0);
4340 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4341 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4342 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4343 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4344 /* XXX SDMA RLC - todo */
4346 cik_srbm_select(rdev, 0, 0, 0, 0);
4347 mutex_unlock(&rdev->srbm_mutex);
4349 cik_pcie_gart_tlb_flush(rdev);
4350 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4351 (unsigned)(rdev->mc.gtt_size >> 20),
4352 (unsigned long long)rdev->gart.table_addr);
4353 rdev->gart.ready = true;
4358 * cik_pcie_gart_disable - gart disable
4360 * @rdev: radeon_device pointer
4362 * This disables all VM page table (CIK).
4364 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4366 /* Disable all tables */
4367 WREG32(VM_CONTEXT0_CNTL, 0);
4368 WREG32(VM_CONTEXT1_CNTL, 0);
4369 /* Setup TLB control */
4370 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4371 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4372 /* Setup L2 cache */
4374 ENABLE_L2_FRAGMENT_PROCESSING |
4375 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4376 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4377 EFFECTIVE_L2_QUEUE_SIZE(7) |
4378 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4379 WREG32(VM_L2_CNTL2, 0);
4380 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4381 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4382 radeon_gart_table_vram_unpin(rdev);
4386 * cik_pcie_gart_fini - vm fini callback
4388 * @rdev: radeon_device pointer
4390 * Tears down the driver GART/VM setup (CIK).
4392 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4394 cik_pcie_gart_disable(rdev);
4395 radeon_gart_table_vram_free(rdev);
4396 radeon_gart_fini(rdev);
4401 * cik_ib_parse - vm ib_parse callback
4403 * @rdev: radeon_device pointer
4404 * @ib: indirect buffer pointer
4406 * CIK uses hw IB checking so this is a nop (CIK).
4408 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4415 * VMID 0 is the physical GPU addresses as used by the kernel.
4416 * VMIDs 1-15 are used for userspace clients and are handled
4417 * by the radeon vm/hsa code.
4420 * cik_vm_init - cik vm init callback
4422 * @rdev: radeon_device pointer
4424 * Inits cik specific vm parameters (number of VMs, base of vram for
4425 * VMIDs 1-15) (CIK).
4426 * Returns 0 for success.
4428 int cik_vm_init(struct radeon_device *rdev)
4431 rdev->vm_manager.nvm = 16;
4432 /* base offset of vram pages */
4433 if (rdev->flags & RADEON_IS_IGP) {
4434 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4436 rdev->vm_manager.vram_base_offset = tmp;
4438 rdev->vm_manager.vram_base_offset = 0;
4444 * cik_vm_fini - cik vm fini callback
4446 * @rdev: radeon_device pointer
4448 * Tear down any asic specific VM setup (CIK).
4450 void cik_vm_fini(struct radeon_device *rdev)
4455 * cik_vm_decode_fault - print human readable fault info
4457 * @rdev: radeon_device pointer
4458 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4459 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4461 * Print human readable fault information (CIK).
4463 static void cik_vm_decode_fault(struct radeon_device *rdev,
4464 u32 status, u32 addr, u32 mc_client)
4466 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4467 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4468 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4469 char *block = (char *)&mc_client;
4471 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4472 protections, vmid, addr,
4473 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4478 * cik_vm_flush - cik vm flush using the CP
4480 * @rdev: radeon_device pointer
4482 * Update the page table base and flush the VM TLB
4483 * using the CP (CIK).
4485 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4487 struct radeon_ring *ring = &rdev->ring[ridx];
4492 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4493 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4494 WRITE_DATA_DST_SEL(0)));
4496 radeon_ring_write(ring,
4497 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4499 radeon_ring_write(ring,
4500 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4502 radeon_ring_write(ring, 0);
4503 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4505 /* update SH_MEM_* regs */
4506 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4507 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4508 WRITE_DATA_DST_SEL(0)));
4509 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4510 radeon_ring_write(ring, 0);
4511 radeon_ring_write(ring, VMID(vm->id));
4513 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4514 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4515 WRITE_DATA_DST_SEL(0)));
4516 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4517 radeon_ring_write(ring, 0);
4519 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4520 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4521 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4522 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4524 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4525 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4526 WRITE_DATA_DST_SEL(0)));
4527 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4528 radeon_ring_write(ring, 0);
4529 radeon_ring_write(ring, VMID(0));
4532 /* We should be using the WAIT_REG_MEM packet here like in
4533 * cik_fence_ring_emit(), but it causes the CP to hang in this
4536 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4537 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4538 WRITE_DATA_DST_SEL(0)));
4539 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4540 radeon_ring_write(ring, 0);
4541 radeon_ring_write(ring, 0);
4543 /* bits 0-15 are the VM contexts0-15 */
4544 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4545 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4546 WRITE_DATA_DST_SEL(0)));
4547 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4548 radeon_ring_write(ring, 0);
4549 radeon_ring_write(ring, 1 << vm->id);
4551 /* compute doesn't have PFP */
4552 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4553 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4554 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4555 radeon_ring_write(ring, 0x0);
4560 * cik_vm_set_page - update the page tables using sDMA
4562 * @rdev: radeon_device pointer
4563 * @ib: indirect buffer to fill with commands
4564 * @pe: addr of the page entry
4565 * @addr: dst addr to write into pe
4566 * @count: number of page entries to update
4567 * @incr: increase next addr by incr bytes
4568 * @flags: access flags
4570 * Update the page tables using CP or sDMA (CIK).
4572 void cik_vm_set_page(struct radeon_device *rdev,
4573 struct radeon_ib *ib,
4575 uint64_t addr, unsigned count,
4576 uint32_t incr, uint32_t flags)
4578 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4582 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4585 ndw = 2 + count * 2;
4589 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4590 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4591 WRITE_DATA_DST_SEL(1));
4592 ib->ptr[ib->length_dw++] = pe;
4593 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4594 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4595 if (flags & RADEON_VM_PAGE_SYSTEM) {
4596 value = radeon_vm_map_gart(rdev, addr);
4597 value &= 0xFFFFFFFFFFFFF000ULL;
4598 } else if (flags & RADEON_VM_PAGE_VALID) {
4604 value |= r600_flags;
4605 ib->ptr[ib->length_dw++] = value;
4606 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4611 if (flags & RADEON_VM_PAGE_SYSTEM) {
4617 /* for non-physically contiguous pages (system) */
4618 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4619 ib->ptr[ib->length_dw++] = pe;
4620 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4621 ib->ptr[ib->length_dw++] = ndw;
4622 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4623 if (flags & RADEON_VM_PAGE_SYSTEM) {
4624 value = radeon_vm_map_gart(rdev, addr);
4625 value &= 0xFFFFFFFFFFFFF000ULL;
4626 } else if (flags & RADEON_VM_PAGE_VALID) {
4632 value |= r600_flags;
4633 ib->ptr[ib->length_dw++] = value;
4634 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4643 if (flags & RADEON_VM_PAGE_VALID)
4647 /* for physically contiguous pages (vram) */
4648 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4649 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4650 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4651 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4652 ib->ptr[ib->length_dw++] = 0;
4653 ib->ptr[ib->length_dw++] = value; /* value */
4654 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4655 ib->ptr[ib->length_dw++] = incr; /* increment size */
4656 ib->ptr[ib->length_dw++] = 0;
4657 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4663 while (ib->length_dw & 0x7)
4664 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4669 * cik_dma_vm_flush - cik vm flush using sDMA
4671 * @rdev: radeon_device pointer
4673 * Update the page table base and flush the VM TLB
4676 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4678 struct radeon_ring *ring = &rdev->ring[ridx];
4679 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4680 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4686 if (ridx == R600_RING_TYPE_DMA_INDEX)
4687 ref_and_mask = SDMA0;
4689 ref_and_mask = SDMA1;
4691 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4693 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4695 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4697 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4699 /* update SH_MEM_* regs */
4700 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4701 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4702 radeon_ring_write(ring, VMID(vm->id));
4704 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4705 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4706 radeon_ring_write(ring, 0);
4708 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4709 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4710 radeon_ring_write(ring, 0);
4712 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4713 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4714 radeon_ring_write(ring, 1);
4716 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4717 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4718 radeon_ring_write(ring, 0);
4720 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4721 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4722 radeon_ring_write(ring, VMID(0));
4725 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4726 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4727 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4728 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4729 radeon_ring_write(ring, ref_and_mask); /* MASK */
4730 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4733 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4734 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4735 radeon_ring_write(ring, 1 << vm->id);
4740 * The RLC is a multi-purpose microengine that handles a
4741 * variety of functions, the most important of which is
4742 * the interrupt controller.
4745 * cik_rlc_stop - stop the RLC ME
4747 * @rdev: radeon_device pointer
4749 * Halt the RLC ME (MicroEngine) (CIK).
4751 static void cik_rlc_stop(struct radeon_device *rdev)
4756 tmp = RREG32(CP_INT_CNTL_RING0);
4757 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4758 WREG32(CP_INT_CNTL_RING0, tmp);
4760 RREG32(CB_CGTT_SCLK_CTRL);
4761 RREG32(CB_CGTT_SCLK_CTRL);
4762 RREG32(CB_CGTT_SCLK_CTRL);
4763 RREG32(CB_CGTT_SCLK_CTRL);
4765 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4766 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4768 WREG32(RLC_CNTL, 0);
4770 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4771 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4772 cik_select_se_sh(rdev, i, j);
4773 for (k = 0; k < rdev->usec_timeout; k++) {
4774 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4780 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4782 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4783 for (k = 0; k < rdev->usec_timeout; k++) {
4784 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4791 * cik_rlc_start - start the RLC ME
4793 * @rdev: radeon_device pointer
4795 * Unhalt the RLC ME (MicroEngine) (CIK).
4797 static void cik_rlc_start(struct radeon_device *rdev)
4801 WREG32(RLC_CNTL, RLC_ENABLE);
4803 tmp = RREG32(CP_INT_CNTL_RING0);
4804 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4805 WREG32(CP_INT_CNTL_RING0, tmp);
4811 * cik_rlc_resume - setup the RLC hw
4813 * @rdev: radeon_device pointer
4815 * Initialize the RLC registers, load the ucode,
4816 * and start the RLC (CIK).
4817 * Returns 0 for success, -EINVAL if the ucode is not available.
4819 static int cik_rlc_resume(struct radeon_device *rdev)
4822 u32 clear_state_info[3];
4823 const __be32 *fw_data;
4828 switch (rdev->family) {
4831 size = BONAIRE_RLC_UCODE_SIZE;
4834 size = KV_RLC_UCODE_SIZE;
4837 size = KB_RLC_UCODE_SIZE;
4843 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4844 RREG32(GRBM_SOFT_RESET);
4846 WREG32(GRBM_SOFT_RESET, 0);
4847 RREG32(GRBM_SOFT_RESET);
4850 WREG32(RLC_LB_CNTR_INIT, 0);
4851 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4853 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4854 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4855 WREG32(RLC_LB_PARAMS, 0x00600408);
4856 WREG32(RLC_LB_CNTL, 0x80000004);
4858 WREG32(RLC_MC_CNTL, 0);
4859 WREG32(RLC_UCODE_CNTL, 0);
4861 fw_data = (const __be32 *)rdev->rlc_fw->data;
4862 WREG32(RLC_GPM_UCODE_ADDR, 0);
4863 for (i = 0; i < size; i++)
4864 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4865 WREG32(RLC_GPM_UCODE_ADDR, 0);
4868 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4869 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4870 clear_state_info[2] = 0;//cik_default_size;
4871 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4872 for (i = 0; i < 3; i++)
4873 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4874 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4876 cik_rlc_start(rdev);
4883 * Starting with r6xx, interrupts are handled via a ring buffer.
4884 * Ring buffers are areas of GPU accessible memory that the GPU
4885 * writes interrupt vectors into and the host reads vectors out of.
4886 * There is a rptr (read pointer) that determines where the
4887 * host is currently reading, and a wptr (write pointer)
4888 * which determines where the GPU has written. When the
4889 * pointers are equal, the ring is idle. When the GPU
4890 * writes vectors to the ring buffer, it increments the
4891 * wptr. When there is an interrupt, the host then starts
4892 * fetching commands and processing them until the pointers are
4893 * equal again at which point it updates the rptr.
4897 * cik_enable_interrupts - Enable the interrupt ring buffer
4899 * @rdev: radeon_device pointer
4901 * Enable the interrupt ring buffer (CIK).
4903 static void cik_enable_interrupts(struct radeon_device *rdev)
4905 u32 ih_cntl = RREG32(IH_CNTL);
4906 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4908 ih_cntl |= ENABLE_INTR;
4909 ih_rb_cntl |= IH_RB_ENABLE;
4910 WREG32(IH_CNTL, ih_cntl);
4911 WREG32(IH_RB_CNTL, ih_rb_cntl);
4912 rdev->ih.enabled = true;
4916 * cik_disable_interrupts - Disable the interrupt ring buffer
4918 * @rdev: radeon_device pointer
4920 * Disable the interrupt ring buffer (CIK).
4922 static void cik_disable_interrupts(struct radeon_device *rdev)
4924 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4925 u32 ih_cntl = RREG32(IH_CNTL);
4927 ih_rb_cntl &= ~IH_RB_ENABLE;
4928 ih_cntl &= ~ENABLE_INTR;
4929 WREG32(IH_RB_CNTL, ih_rb_cntl);
4930 WREG32(IH_CNTL, ih_cntl);
4931 /* set rptr, wptr to 0 */
4932 WREG32(IH_RB_RPTR, 0);
4933 WREG32(IH_RB_WPTR, 0);
4934 rdev->ih.enabled = false;
4939 * cik_disable_interrupt_state - Disable all interrupt sources
4941 * @rdev: radeon_device pointer
4943 * Clear all interrupt enable bits used by the driver (CIK).
4945 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4950 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4952 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4953 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4954 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4955 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4956 /* compute queues */
4957 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4958 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4959 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4960 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4961 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4962 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4963 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4964 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4966 WREG32(GRBM_INT_CNTL, 0);
4967 /* vline/vblank, etc. */
4968 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4969 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4970 if (rdev->num_crtc >= 4) {
4971 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4972 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4974 if (rdev->num_crtc >= 6) {
4975 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4976 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4980 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4982 /* digital hotplug */
4983 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984 WREG32(DC_HPD1_INT_CONTROL, tmp);
4985 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4986 WREG32(DC_HPD2_INT_CONTROL, tmp);
4987 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4988 WREG32(DC_HPD3_INT_CONTROL, tmp);
4989 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4990 WREG32(DC_HPD4_INT_CONTROL, tmp);
4991 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4992 WREG32(DC_HPD5_INT_CONTROL, tmp);
4993 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4994 WREG32(DC_HPD6_INT_CONTROL, tmp);
4999 * cik_irq_init - init and enable the interrupt ring
5001 * @rdev: radeon_device pointer
5003 * Allocate a ring buffer for the interrupt controller,
5004 * enable the RLC, disable interrupts, enable the IH
5005 * ring buffer and enable it (CIK).
5006 * Called at device load and reume.
5007 * Returns 0 for success, errors for failure.
5009 static int cik_irq_init(struct radeon_device *rdev)
5013 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5016 ret = r600_ih_ring_alloc(rdev);
5021 cik_disable_interrupts(rdev);
5024 ret = cik_rlc_resume(rdev);
5026 r600_ih_ring_fini(rdev);
5030 /* setup interrupt control */
5031 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5032 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5033 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5034 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5035 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5037 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5038 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5039 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5040 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5042 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5043 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5045 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5046 IH_WPTR_OVERFLOW_CLEAR |
5049 if (rdev->wb.enabled)
5050 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5052 /* set the writeback address whether it's enabled or not */
5053 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5054 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5056 WREG32(IH_RB_CNTL, ih_rb_cntl);
5058 /* set rptr, wptr to 0 */
5059 WREG32(IH_RB_RPTR, 0);
5060 WREG32(IH_RB_WPTR, 0);
5062 /* Default settings for IH_CNTL (disabled at first) */
5063 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5064 /* RPTR_REARM only works if msi's are enabled */
5065 if (rdev->msi_enabled)
5066 ih_cntl |= RPTR_REARM;
5067 WREG32(IH_CNTL, ih_cntl);
5069 /* force the active interrupt state to all disabled */
5070 cik_disable_interrupt_state(rdev);
5072 pci_set_master(rdev->pdev);
5075 cik_enable_interrupts(rdev);
5081 * cik_irq_set - enable/disable interrupt sources
5083 * @rdev: radeon_device pointer
5085 * Enable interrupt sources on the GPU (vblanks, hpd,
5087 * Returns 0 for success, errors for failure.
5089 int cik_irq_set(struct radeon_device *rdev)
5091 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5092 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5093 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5094 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5095 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5096 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5097 u32 grbm_int_cntl = 0;
5098 u32 dma_cntl, dma_cntl1;
5100 if (!rdev->irq.installed) {
5101 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5104 /* don't enable anything if the ih is disabled */
5105 if (!rdev->ih.enabled) {
5106 cik_disable_interrupts(rdev);
5107 /* force the active interrupt state to all disabled */
5108 cik_disable_interrupt_state(rdev);
5112 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5113 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5114 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5115 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5116 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5117 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5119 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5120 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5122 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5124 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5125 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5126 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5127 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5128 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5129 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5131 /* enable CP interrupts on all rings */
5132 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5133 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5134 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5136 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5137 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5138 DRM_DEBUG("si_irq_set: sw int cp1\n");
5139 if (ring->me == 1) {
5140 switch (ring->pipe) {
5142 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5145 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5148 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5151 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5154 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5157 } else if (ring->me == 2) {
5158 switch (ring->pipe) {
5160 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5163 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5166 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5169 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5172 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5176 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5179 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5180 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5181 DRM_DEBUG("si_irq_set: sw int cp2\n");
5182 if (ring->me == 1) {
5183 switch (ring->pipe) {
5185 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5188 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5191 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5194 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5197 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5200 } else if (ring->me == 2) {
5201 switch (ring->pipe) {
5203 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5206 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5209 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5212 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5215 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5219 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5223 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5224 DRM_DEBUG("cik_irq_set: sw int dma\n");
5225 dma_cntl |= TRAP_ENABLE;
5228 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5229 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5230 dma_cntl1 |= TRAP_ENABLE;
5233 if (rdev->irq.crtc_vblank_int[0] ||
5234 atomic_read(&rdev->irq.pflip[0])) {
5235 DRM_DEBUG("cik_irq_set: vblank 0\n");
5236 crtc1 |= VBLANK_INTERRUPT_MASK;
5238 if (rdev->irq.crtc_vblank_int[1] ||
5239 atomic_read(&rdev->irq.pflip[1])) {
5240 DRM_DEBUG("cik_irq_set: vblank 1\n");
5241 crtc2 |= VBLANK_INTERRUPT_MASK;
5243 if (rdev->irq.crtc_vblank_int[2] ||
5244 atomic_read(&rdev->irq.pflip[2])) {
5245 DRM_DEBUG("cik_irq_set: vblank 2\n");
5246 crtc3 |= VBLANK_INTERRUPT_MASK;
5248 if (rdev->irq.crtc_vblank_int[3] ||
5249 atomic_read(&rdev->irq.pflip[3])) {
5250 DRM_DEBUG("cik_irq_set: vblank 3\n");
5251 crtc4 |= VBLANK_INTERRUPT_MASK;
5253 if (rdev->irq.crtc_vblank_int[4] ||
5254 atomic_read(&rdev->irq.pflip[4])) {
5255 DRM_DEBUG("cik_irq_set: vblank 4\n");
5256 crtc5 |= VBLANK_INTERRUPT_MASK;
5258 if (rdev->irq.crtc_vblank_int[5] ||
5259 atomic_read(&rdev->irq.pflip[5])) {
5260 DRM_DEBUG("cik_irq_set: vblank 5\n");
5261 crtc6 |= VBLANK_INTERRUPT_MASK;
5263 if (rdev->irq.hpd[0]) {
5264 DRM_DEBUG("cik_irq_set: hpd 1\n");
5265 hpd1 |= DC_HPDx_INT_EN;
5267 if (rdev->irq.hpd[1]) {
5268 DRM_DEBUG("cik_irq_set: hpd 2\n");
5269 hpd2 |= DC_HPDx_INT_EN;
5271 if (rdev->irq.hpd[2]) {
5272 DRM_DEBUG("cik_irq_set: hpd 3\n");
5273 hpd3 |= DC_HPDx_INT_EN;
5275 if (rdev->irq.hpd[3]) {
5276 DRM_DEBUG("cik_irq_set: hpd 4\n");
5277 hpd4 |= DC_HPDx_INT_EN;
5279 if (rdev->irq.hpd[4]) {
5280 DRM_DEBUG("cik_irq_set: hpd 5\n");
5281 hpd5 |= DC_HPDx_INT_EN;
5283 if (rdev->irq.hpd[5]) {
5284 DRM_DEBUG("cik_irq_set: hpd 6\n");
5285 hpd6 |= DC_HPDx_INT_EN;
5288 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5290 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5291 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5293 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5294 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5295 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5296 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5297 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5298 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5299 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5300 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5302 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5304 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5305 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5306 if (rdev->num_crtc >= 4) {
5307 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5308 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5310 if (rdev->num_crtc >= 6) {
5311 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5312 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5315 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5316 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5317 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5318 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5319 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5320 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5326 * cik_irq_ack - ack interrupt sources
5328 * @rdev: radeon_device pointer
5330 * Ack interrupt sources on the GPU (vblanks, hpd,
5331 * etc.) (CIK). Certain interrupts sources are sw
5332 * generated and do not require an explicit ack.
5334 static inline void cik_irq_ack(struct radeon_device *rdev)
5338 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5339 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5340 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5341 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5342 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5343 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5344 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5346 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5348 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5350 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5352 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5355 if (rdev->num_crtc >= 4) {
5356 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5358 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5360 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5361 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5362 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5363 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5366 if (rdev->num_crtc >= 6) {
5367 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5369 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5371 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5372 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5373 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5374 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5377 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5378 tmp = RREG32(DC_HPD1_INT_CONTROL);
5379 tmp |= DC_HPDx_INT_ACK;
5380 WREG32(DC_HPD1_INT_CONTROL, tmp);
5382 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5383 tmp = RREG32(DC_HPD2_INT_CONTROL);
5384 tmp |= DC_HPDx_INT_ACK;
5385 WREG32(DC_HPD2_INT_CONTROL, tmp);
5387 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5388 tmp = RREG32(DC_HPD3_INT_CONTROL);
5389 tmp |= DC_HPDx_INT_ACK;
5390 WREG32(DC_HPD3_INT_CONTROL, tmp);
5392 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5393 tmp = RREG32(DC_HPD4_INT_CONTROL);
5394 tmp |= DC_HPDx_INT_ACK;
5395 WREG32(DC_HPD4_INT_CONTROL, tmp);
5397 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5398 tmp = RREG32(DC_HPD5_INT_CONTROL);
5399 tmp |= DC_HPDx_INT_ACK;
5400 WREG32(DC_HPD5_INT_CONTROL, tmp);
5402 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5403 tmp = RREG32(DC_HPD5_INT_CONTROL);
5404 tmp |= DC_HPDx_INT_ACK;
5405 WREG32(DC_HPD6_INT_CONTROL, tmp);
5410 * cik_irq_disable - disable interrupts
5412 * @rdev: radeon_device pointer
5414 * Disable interrupts on the hw (CIK).
5416 static void cik_irq_disable(struct radeon_device *rdev)
5418 cik_disable_interrupts(rdev);
5419 /* Wait and acknowledge irq */
5422 cik_disable_interrupt_state(rdev);
5426 * cik_irq_disable - disable interrupts for suspend
5428 * @rdev: radeon_device pointer
5430 * Disable interrupts and stop the RLC (CIK).
5433 static void cik_irq_suspend(struct radeon_device *rdev)
5435 cik_irq_disable(rdev);
5440 * cik_irq_fini - tear down interrupt support
5442 * @rdev: radeon_device pointer
5444 * Disable interrupts on the hw and free the IH ring
5446 * Used for driver unload.
5448 static void cik_irq_fini(struct radeon_device *rdev)
5450 cik_irq_suspend(rdev);
5451 r600_ih_ring_fini(rdev);
5455 * cik_get_ih_wptr - get the IH ring buffer wptr
5457 * @rdev: radeon_device pointer
5459 * Get the IH ring buffer wptr from either the register
5460 * or the writeback memory buffer (CIK). Also check for
5461 * ring buffer overflow and deal with it.
5462 * Used by cik_irq_process().
5463 * Returns the value of the wptr.
5465 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5469 if (rdev->wb.enabled)
5470 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5472 wptr = RREG32(IH_RB_WPTR);
5474 if (wptr & RB_OVERFLOW) {
5475 /* When a ring buffer overflow happen start parsing interrupt
5476 * from the last not overwritten vector (wptr + 16). Hopefully
5477 * this should allow us to catchup.
5479 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5480 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5481 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5482 tmp = RREG32(IH_RB_CNTL);
5483 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5484 WREG32(IH_RB_CNTL, tmp);
5486 return (wptr & rdev->ih.ptr_mask);
5490 * Each IV ring entry is 128 bits:
5491 * [7:0] - interrupt source id
5493 * [59:32] - interrupt source data
5494 * [63:60] - reserved
5497 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5498 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5499 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5500 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5501 * PIPE_ID - ME0 0=3D
5502 * - ME1&2 compute dispatcher (4 pipes each)
5504 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5505 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5506 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5509 * [127:96] - reserved
5512 * cik_irq_process - interrupt handler
5514 * @rdev: radeon_device pointer
5516 * Interrupt hander (CIK). Walk the IH ring,
5517 * ack interrupts and schedule work to handle
5519 * Returns irq process return code.
5521 int cik_irq_process(struct radeon_device *rdev)
5523 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5524 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5527 u32 src_id, src_data, ring_id;
5528 u8 me_id, pipe_id, queue_id;
5530 bool queue_hotplug = false;
5531 bool queue_reset = false;
5532 u32 addr, status, mc_client;
5534 if (!rdev->ih.enabled || rdev->shutdown)
5537 wptr = cik_get_ih_wptr(rdev);
5540 /* is somebody else already processing irqs? */
5541 if (atomic_xchg(&rdev->ih.lock, 1))
5544 rptr = rdev->ih.rptr;
5545 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5547 /* Order reading of wptr vs. reading of IH ring data */
5550 /* display interrupts */
5553 while (rptr != wptr) {
5554 /* wptr/rptr are in bytes! */
5555 ring_index = rptr / 4;
5556 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5557 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5558 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5561 case 1: /* D1 vblank/vline */
5563 case 0: /* D1 vblank */
5564 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5565 if (rdev->irq.crtc_vblank_int[0]) {
5566 drm_handle_vblank(rdev->ddev, 0);
5567 rdev->pm.vblank_sync = true;
5568 wake_up(&rdev->irq.vblank_queue);
5570 if (atomic_read(&rdev->irq.pflip[0]))
5571 radeon_crtc_handle_flip(rdev, 0);
5572 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5573 DRM_DEBUG("IH: D1 vblank\n");
5576 case 1: /* D1 vline */
5577 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5578 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5579 DRM_DEBUG("IH: D1 vline\n");
5583 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5587 case 2: /* D2 vblank/vline */
5589 case 0: /* D2 vblank */
5590 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5591 if (rdev->irq.crtc_vblank_int[1]) {
5592 drm_handle_vblank(rdev->ddev, 1);
5593 rdev->pm.vblank_sync = true;
5594 wake_up(&rdev->irq.vblank_queue);
5596 if (atomic_read(&rdev->irq.pflip[1]))
5597 radeon_crtc_handle_flip(rdev, 1);
5598 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5599 DRM_DEBUG("IH: D2 vblank\n");
5602 case 1: /* D2 vline */
5603 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5604 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5605 DRM_DEBUG("IH: D2 vline\n");
5609 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5613 case 3: /* D3 vblank/vline */
5615 case 0: /* D3 vblank */
5616 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5617 if (rdev->irq.crtc_vblank_int[2]) {
5618 drm_handle_vblank(rdev->ddev, 2);
5619 rdev->pm.vblank_sync = true;
5620 wake_up(&rdev->irq.vblank_queue);
5622 if (atomic_read(&rdev->irq.pflip[2]))
5623 radeon_crtc_handle_flip(rdev, 2);
5624 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5625 DRM_DEBUG("IH: D3 vblank\n");
5628 case 1: /* D3 vline */
5629 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5630 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5631 DRM_DEBUG("IH: D3 vline\n");
5635 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5639 case 4: /* D4 vblank/vline */
5641 case 0: /* D4 vblank */
5642 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5643 if (rdev->irq.crtc_vblank_int[3]) {
5644 drm_handle_vblank(rdev->ddev, 3);
5645 rdev->pm.vblank_sync = true;
5646 wake_up(&rdev->irq.vblank_queue);
5648 if (atomic_read(&rdev->irq.pflip[3]))
5649 radeon_crtc_handle_flip(rdev, 3);
5650 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5651 DRM_DEBUG("IH: D4 vblank\n");
5654 case 1: /* D4 vline */
5655 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5656 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5657 DRM_DEBUG("IH: D4 vline\n");
5661 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5665 case 5: /* D5 vblank/vline */
5667 case 0: /* D5 vblank */
5668 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5669 if (rdev->irq.crtc_vblank_int[4]) {
5670 drm_handle_vblank(rdev->ddev, 4);
5671 rdev->pm.vblank_sync = true;
5672 wake_up(&rdev->irq.vblank_queue);
5674 if (atomic_read(&rdev->irq.pflip[4]))
5675 radeon_crtc_handle_flip(rdev, 4);
5676 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5677 DRM_DEBUG("IH: D5 vblank\n");
5680 case 1: /* D5 vline */
5681 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5682 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5683 DRM_DEBUG("IH: D5 vline\n");
5687 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5691 case 6: /* D6 vblank/vline */
5693 case 0: /* D6 vblank */
5694 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5695 if (rdev->irq.crtc_vblank_int[5]) {
5696 drm_handle_vblank(rdev->ddev, 5);
5697 rdev->pm.vblank_sync = true;
5698 wake_up(&rdev->irq.vblank_queue);
5700 if (atomic_read(&rdev->irq.pflip[5]))
5701 radeon_crtc_handle_flip(rdev, 5);
5702 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5703 DRM_DEBUG("IH: D6 vblank\n");
5706 case 1: /* D6 vline */
5707 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5708 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5709 DRM_DEBUG("IH: D6 vline\n");
5713 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5717 case 42: /* HPD hotplug */
5720 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5721 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5722 queue_hotplug = true;
5723 DRM_DEBUG("IH: HPD1\n");
5727 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5728 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5729 queue_hotplug = true;
5730 DRM_DEBUG("IH: HPD2\n");
5734 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5735 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5736 queue_hotplug = true;
5737 DRM_DEBUG("IH: HPD3\n");
5741 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5742 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5743 queue_hotplug = true;
5744 DRM_DEBUG("IH: HPD4\n");
5748 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5749 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5750 queue_hotplug = true;
5751 DRM_DEBUG("IH: HPD5\n");
5755 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5756 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5757 queue_hotplug = true;
5758 DRM_DEBUG("IH: HPD6\n");
5762 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5768 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5769 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5770 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5771 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5772 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5774 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5776 cik_vm_decode_fault(rdev, status, addr, mc_client);
5777 /* reset addr and status */
5778 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5780 case 176: /* GFX RB CP_INT */
5781 case 177: /* GFX IB CP_INT */
5782 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5784 case 181: /* CP EOP event */
5785 DRM_DEBUG("IH: CP EOP\n");
5786 /* XXX check the bitfield order! */
5787 me_id = (ring_id & 0x60) >> 5;
5788 pipe_id = (ring_id & 0x18) >> 3;
5789 queue_id = (ring_id & 0x7) >> 0;
5792 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5796 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5797 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5798 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5799 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5803 case 184: /* CP Privileged reg access */
5804 DRM_ERROR("Illegal register access in command stream\n");
5805 /* XXX check the bitfield order! */
5806 me_id = (ring_id & 0x60) >> 5;
5807 pipe_id = (ring_id & 0x18) >> 3;
5808 queue_id = (ring_id & 0x7) >> 0;
5811 /* This results in a full GPU reset, but all we need to do is soft
5812 * reset the CP for gfx
5826 case 185: /* CP Privileged inst */
5827 DRM_ERROR("Illegal instruction in command stream\n");
5828 /* XXX check the bitfield order! */
5829 me_id = (ring_id & 0x60) >> 5;
5830 pipe_id = (ring_id & 0x18) >> 3;
5831 queue_id = (ring_id & 0x7) >> 0;
5834 /* This results in a full GPU reset, but all we need to do is soft
5835 * reset the CP for gfx
5849 case 224: /* SDMA trap event */
5850 /* XXX check the bitfield order! */
5851 me_id = (ring_id & 0x3) >> 0;
5852 queue_id = (ring_id & 0xc) >> 2;
5853 DRM_DEBUG("IH: SDMA trap\n");
5858 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5871 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5883 case 241: /* SDMA Privileged inst */
5884 case 247: /* SDMA Privileged inst */
5885 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5886 /* XXX check the bitfield order! */
5887 me_id = (ring_id & 0x3) >> 0;
5888 queue_id = (ring_id & 0xc) >> 2;
5922 case 233: /* GUI IDLE */
5923 DRM_DEBUG("IH: GUI idle\n");
5926 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5930 /* wptr/rptr are in bytes! */
5932 rptr &= rdev->ih.ptr_mask;
5935 schedule_work(&rdev->hotplug_work);
5937 schedule_work(&rdev->reset_work);
5938 rdev->ih.rptr = rptr;
5939 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5940 atomic_set(&rdev->ih.lock, 0);
5942 /* make sure wptr hasn't changed while processing */
5943 wptr = cik_get_ih_wptr(rdev);
5951 * startup/shutdown callbacks
5954 * cik_startup - program the asic to a functional state
5956 * @rdev: radeon_device pointer
5958 * Programs the asic to a functional state (CIK).
5959 * Called by cik_init() and cik_resume().
5960 * Returns 0 for success, error for failure.
5962 static int cik_startup(struct radeon_device *rdev)
5964 struct radeon_ring *ring;
5967 cik_mc_program(rdev);
5969 if (rdev->flags & RADEON_IS_IGP) {
5970 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5971 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5972 r = cik_init_microcode(rdev);
5974 DRM_ERROR("Failed to load firmware!\n");
5979 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5980 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5982 r = cik_init_microcode(rdev);
5984 DRM_ERROR("Failed to load firmware!\n");
5989 r = ci_mc_load_microcode(rdev);
5991 DRM_ERROR("Failed to load MC firmware!\n");
5996 r = r600_vram_scratch_init(rdev);
6000 r = cik_pcie_gart_enable(rdev);
6005 /* allocate rlc buffers */
6006 r = si_rlc_init(rdev);
6008 DRM_ERROR("Failed to init rlc BOs!\n");
6012 /* allocate wb buffer */
6013 r = radeon_wb_init(rdev);
6017 /* allocate mec buffers */
6018 r = cik_mec_init(rdev);
6020 DRM_ERROR("Failed to init MEC BOs!\n");
6024 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6026 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6030 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6032 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6036 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6038 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6042 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6044 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6048 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6050 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6054 r = cik_uvd_resume(rdev);
6056 r = radeon_fence_driver_start_ring(rdev,
6057 R600_RING_TYPE_UVD_INDEX);
6059 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6062 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6065 if (!rdev->irq.installed) {
6066 r = radeon_irq_kms_init(rdev);
6071 r = cik_irq_init(rdev);
6073 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6074 radeon_irq_kms_fini(rdev);
6079 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6080 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6081 CP_RB0_RPTR, CP_RB0_WPTR,
6082 0, 0xfffff, RADEON_CP_PACKET2);
6086 /* set up the compute queues */
6087 /* type-2 packets are deprecated on MEC, use type-3 instead */
6088 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6089 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6090 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6091 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6094 ring->me = 1; /* first MEC */
6095 ring->pipe = 0; /* first pipe */
6096 ring->queue = 0; /* first queue */
6097 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6099 /* type-2 packets are deprecated on MEC, use type-3 instead */
6100 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6101 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6102 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6103 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6106 /* dGPU only have 1 MEC */
6107 ring->me = 1; /* first MEC */
6108 ring->pipe = 0; /* first pipe */
6109 ring->queue = 1; /* second queue */
6110 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6112 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6113 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6114 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6115 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6116 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6120 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6121 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6122 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6123 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6124 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6128 r = cik_cp_resume(rdev);
6132 r = cik_sdma_resume(rdev);
6136 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6137 if (ring->ring_size) {
6138 r = radeon_ring_init(rdev, ring, ring->ring_size,
6139 R600_WB_UVD_RPTR_OFFSET,
6140 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6141 0, 0xfffff, RADEON_CP_PACKET2);
6143 r = r600_uvd_init(rdev);
6145 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6148 r = radeon_ib_pool_init(rdev);
6150 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6154 r = radeon_vm_manager_init(rdev);
6156 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6164 * cik_resume - resume the asic to a functional state
6166 * @rdev: radeon_device pointer
6168 * Programs the asic to a functional state (CIK).
6170 * Returns 0 for success, error for failure.
6172 int cik_resume(struct radeon_device *rdev)
6177 atom_asic_init(rdev->mode_info.atom_context);
6179 /* init golden registers */
6180 cik_init_golden_registers(rdev);
6182 rdev->accel_working = true;
6183 r = cik_startup(rdev);
6185 DRM_ERROR("cik startup failed on resume\n");
6186 rdev->accel_working = false;
6195 * cik_suspend - suspend the asic
6197 * @rdev: radeon_device pointer
6199 * Bring the chip into a state suitable for suspend (CIK).
6200 * Called at suspend.
6201 * Returns 0 for success.
6203 int cik_suspend(struct radeon_device *rdev)
6205 radeon_vm_manager_fini(rdev);
6206 cik_cp_enable(rdev, false);
6207 cik_sdma_enable(rdev, false);
6208 r600_uvd_stop(rdev);
6209 radeon_uvd_suspend(rdev);
6210 cik_irq_suspend(rdev);
6211 radeon_wb_disable(rdev);
6212 cik_pcie_gart_disable(rdev);
6216 /* Plan is to move initialization in that function and use
6217 * helper function so that radeon_device_init pretty much
6218 * do nothing more than calling asic specific function. This
6219 * should also allow to remove a bunch of callback function
6223 * cik_init - asic specific driver and hw init
6225 * @rdev: radeon_device pointer
6227 * Setup asic specific driver variables and program the hw
6228 * to a functional state (CIK).
6229 * Called at driver startup.
6230 * Returns 0 for success, errors for failure.
6232 int cik_init(struct radeon_device *rdev)
6234 struct radeon_ring *ring;
6238 if (!radeon_get_bios(rdev)) {
6239 if (ASIC_IS_AVIVO(rdev))
6242 /* Must be an ATOMBIOS */
6243 if (!rdev->is_atom_bios) {
6244 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6247 r = radeon_atombios_init(rdev);
6251 /* Post card if necessary */
6252 if (!radeon_card_posted(rdev)) {
6254 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6257 DRM_INFO("GPU not posted. posting now...\n");
6258 atom_asic_init(rdev->mode_info.atom_context);
6260 /* init golden registers */
6261 cik_init_golden_registers(rdev);
6262 /* Initialize scratch registers */
6263 cik_scratch_init(rdev);
6264 /* Initialize surface registers */
6265 radeon_surface_init(rdev);
6266 /* Initialize clocks */
6267 radeon_get_clock_info(rdev->ddev);
6270 r = radeon_fence_driver_init(rdev);
6274 /* initialize memory controller */
6275 r = cik_mc_init(rdev);
6278 /* Memory manager */
6279 r = radeon_bo_init(rdev);
6283 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6284 ring->ring_obj = NULL;
6285 r600_ring_init(rdev, ring, 1024 * 1024);
6287 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6288 ring->ring_obj = NULL;
6289 r600_ring_init(rdev, ring, 1024 * 1024);
6290 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6294 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6295 ring->ring_obj = NULL;
6296 r600_ring_init(rdev, ring, 1024 * 1024);
6297 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6301 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6302 ring->ring_obj = NULL;
6303 r600_ring_init(rdev, ring, 256 * 1024);
6305 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6306 ring->ring_obj = NULL;
6307 r600_ring_init(rdev, ring, 256 * 1024);
6309 r = radeon_uvd_init(rdev);
6311 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6312 ring->ring_obj = NULL;
6313 r600_ring_init(rdev, ring, 4096);
6316 rdev->ih.ring_obj = NULL;
6317 r600_ih_ring_init(rdev, 64 * 1024);
6319 r = r600_pcie_gart_init(rdev);
6323 rdev->accel_working = true;
6324 r = cik_startup(rdev);
6326 dev_err(rdev->dev, "disabling GPU acceleration\n");
6328 cik_sdma_fini(rdev);
6332 radeon_wb_fini(rdev);
6333 radeon_ib_pool_fini(rdev);
6334 radeon_vm_manager_fini(rdev);
6335 radeon_irq_kms_fini(rdev);
6336 cik_pcie_gart_fini(rdev);
6337 rdev->accel_working = false;
6340 /* Don't start up if the MC ucode is missing.
6341 * The default clocks and voltages before the MC ucode
6342 * is loaded are not suffient for advanced operations.
6344 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6345 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6353 * cik_fini - asic specific driver and hw fini
6355 * @rdev: radeon_device pointer
6357 * Tear down the asic specific driver variables and program the hw
6358 * to an idle state (CIK).
6359 * Called at driver unload.
6361 void cik_fini(struct radeon_device *rdev)
6364 cik_sdma_fini(rdev);
6368 radeon_wb_fini(rdev);
6369 radeon_vm_manager_fini(rdev);
6370 radeon_ib_pool_fini(rdev);
6371 radeon_irq_kms_fini(rdev);
6372 r600_uvd_stop(rdev);
6373 radeon_uvd_fini(rdev);
6374 cik_pcie_gart_fini(rdev);
6375 r600_vram_scratch_fini(rdev);
6376 radeon_gem_fini(rdev);
6377 radeon_fence_driver_fini(rdev);
6378 radeon_bo_fini(rdev);
6379 radeon_atombios_fini(rdev);
6384 /* display watermark setup */
6386 * dce8_line_buffer_adjust - Set up the line buffer
6388 * @rdev: radeon_device pointer
6389 * @radeon_crtc: the selected display controller
6390 * @mode: the current display mode on the selected display
6393 * Setup up the line buffer allocation for
6394 * the selected display controller (CIK).
6395 * Returns the line buffer size in pixels.
6397 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6398 struct radeon_crtc *radeon_crtc,
6399 struct drm_display_mode *mode)
6405 * There are 6 line buffers, one for each display controllers.
6406 * There are 3 partitions per LB. Select the number of partitions
6407 * to enable based on the display width. For display widths larger
6408 * than 4096, you need use to use 2 display controllers and combine
6409 * them using the stereo blender.
6411 if (radeon_crtc->base.enabled && mode) {
6412 if (mode->crtc_hdisplay < 1920)
6414 else if (mode->crtc_hdisplay < 2560)
6416 else if (mode->crtc_hdisplay < 4096)
6419 DRM_DEBUG_KMS("Mode too big for LB!\n");
6425 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6426 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6428 if (radeon_crtc->base.enabled && mode) {
6440 /* controller not enabled, so no lb used */
6445 * cik_get_number_of_dram_channels - get the number of dram channels
6447 * @rdev: radeon_device pointer
6449 * Look up the number of video ram channels (CIK).
6450 * Used for display watermark bandwidth calculations
6451 * Returns the number of dram channels
6453 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6455 u32 tmp = RREG32(MC_SHARED_CHMAP);
6457 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6480 struct dce8_wm_params {
6481 u32 dram_channels; /* number of dram channels */
6482 u32 yclk; /* bandwidth per dram data pin in kHz */
6483 u32 sclk; /* engine clock in kHz */
6484 u32 disp_clk; /* display clock in kHz */
6485 u32 src_width; /* viewport width */
6486 u32 active_time; /* active display time in ns */
6487 u32 blank_time; /* blank time in ns */
6488 bool interlaced; /* mode is interlaced */
6489 fixed20_12 vsc; /* vertical scale ratio */
6490 u32 num_heads; /* number of active crtcs */
6491 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6492 u32 lb_size; /* line buffer allocated to pipe */
6493 u32 vtaps; /* vertical scaler taps */
6497 * dce8_dram_bandwidth - get the dram bandwidth
6499 * @wm: watermark calculation data
6501 * Calculate the raw dram bandwidth (CIK).
6502 * Used for display watermark bandwidth calculations
6503 * Returns the dram bandwidth in MBytes/s
6505 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6507 /* Calculate raw DRAM Bandwidth */
6508 fixed20_12 dram_efficiency; /* 0.7 */
6509 fixed20_12 yclk, dram_channels, bandwidth;
6512 a.full = dfixed_const(1000);
6513 yclk.full = dfixed_const(wm->yclk);
6514 yclk.full = dfixed_div(yclk, a);
6515 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6516 a.full = dfixed_const(10);
6517 dram_efficiency.full = dfixed_const(7);
6518 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6519 bandwidth.full = dfixed_mul(dram_channels, yclk);
6520 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6522 return dfixed_trunc(bandwidth);
6526 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6528 * @wm: watermark calculation data
6530 * Calculate the dram bandwidth used for display (CIK).
6531 * Used for display watermark bandwidth calculations
6532 * Returns the dram bandwidth for display in MBytes/s
6534 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6536 /* Calculate DRAM Bandwidth and the part allocated to display. */
6537 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6538 fixed20_12 yclk, dram_channels, bandwidth;
6541 a.full = dfixed_const(1000);
6542 yclk.full = dfixed_const(wm->yclk);
6543 yclk.full = dfixed_div(yclk, a);
6544 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6545 a.full = dfixed_const(10);
6546 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6547 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6548 bandwidth.full = dfixed_mul(dram_channels, yclk);
6549 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6551 return dfixed_trunc(bandwidth);
6555 * dce8_data_return_bandwidth - get the data return bandwidth
6557 * @wm: watermark calculation data
6559 * Calculate the data return bandwidth used for display (CIK).
6560 * Used for display watermark bandwidth calculations
6561 * Returns the data return bandwidth in MBytes/s
6563 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6565 /* Calculate the display Data return Bandwidth */
6566 fixed20_12 return_efficiency; /* 0.8 */
6567 fixed20_12 sclk, bandwidth;
6570 a.full = dfixed_const(1000);
6571 sclk.full = dfixed_const(wm->sclk);
6572 sclk.full = dfixed_div(sclk, a);
6573 a.full = dfixed_const(10);
6574 return_efficiency.full = dfixed_const(8);
6575 return_efficiency.full = dfixed_div(return_efficiency, a);
6576 a.full = dfixed_const(32);
6577 bandwidth.full = dfixed_mul(a, sclk);
6578 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6580 return dfixed_trunc(bandwidth);
6584 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6586 * @wm: watermark calculation data
6588 * Calculate the dmif bandwidth used for display (CIK).
6589 * Used for display watermark bandwidth calculations
6590 * Returns the dmif bandwidth in MBytes/s
6592 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6594 /* Calculate the DMIF Request Bandwidth */
6595 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6596 fixed20_12 disp_clk, bandwidth;
6599 a.full = dfixed_const(1000);
6600 disp_clk.full = dfixed_const(wm->disp_clk);
6601 disp_clk.full = dfixed_div(disp_clk, a);
6602 a.full = dfixed_const(32);
6603 b.full = dfixed_mul(a, disp_clk);
6605 a.full = dfixed_const(10);
6606 disp_clk_request_efficiency.full = dfixed_const(8);
6607 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6609 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6611 return dfixed_trunc(bandwidth);
6615 * dce8_available_bandwidth - get the min available bandwidth
6617 * @wm: watermark calculation data
6619 * Calculate the min available bandwidth used for display (CIK).
6620 * Used for display watermark bandwidth calculations
6621 * Returns the min available bandwidth in MBytes/s
6623 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6625 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6626 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6627 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6628 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6630 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6634 * dce8_average_bandwidth - get the average available bandwidth
6636 * @wm: watermark calculation data
6638 * Calculate the average available bandwidth used for display (CIK).
6639 * Used for display watermark bandwidth calculations
6640 * Returns the average available bandwidth in MBytes/s
6642 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6644 /* Calculate the display mode Average Bandwidth
6645 * DisplayMode should contain the source and destination dimensions,
6649 fixed20_12 line_time;
6650 fixed20_12 src_width;
6651 fixed20_12 bandwidth;
6654 a.full = dfixed_const(1000);
6655 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6656 line_time.full = dfixed_div(line_time, a);
6657 bpp.full = dfixed_const(wm->bytes_per_pixel);
6658 src_width.full = dfixed_const(wm->src_width);
6659 bandwidth.full = dfixed_mul(src_width, bpp);
6660 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6661 bandwidth.full = dfixed_div(bandwidth, line_time);
6663 return dfixed_trunc(bandwidth);
6667 * dce8_latency_watermark - get the latency watermark
6669 * @wm: watermark calculation data
6671 * Calculate the latency watermark (CIK).
6672 * Used for display watermark bandwidth calculations
6673 * Returns the latency watermark in ns
6675 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6677 /* First calculate the latency in ns */
6678 u32 mc_latency = 2000; /* 2000 ns. */
6679 u32 available_bandwidth = dce8_available_bandwidth(wm);
6680 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6681 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6682 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6683 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6684 (wm->num_heads * cursor_line_pair_return_time);
6685 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6686 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6687 u32 tmp, dmif_size = 12288;
6690 if (wm->num_heads == 0)
6693 a.full = dfixed_const(2);
6694 b.full = dfixed_const(1);
6695 if ((wm->vsc.full > a.full) ||
6696 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6698 ((wm->vsc.full >= a.full) && wm->interlaced))
6699 max_src_lines_per_dst_line = 4;
6701 max_src_lines_per_dst_line = 2;
6703 a.full = dfixed_const(available_bandwidth);
6704 b.full = dfixed_const(wm->num_heads);
6705 a.full = dfixed_div(a, b);
6707 b.full = dfixed_const(mc_latency + 512);
6708 c.full = dfixed_const(wm->disp_clk);
6709 b.full = dfixed_div(b, c);
6711 c.full = dfixed_const(dmif_size);
6712 b.full = dfixed_div(c, b);
6714 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6716 b.full = dfixed_const(1000);
6717 c.full = dfixed_const(wm->disp_clk);
6718 b.full = dfixed_div(c, b);
6719 c.full = dfixed_const(wm->bytes_per_pixel);
6720 b.full = dfixed_mul(b, c);
6722 lb_fill_bw = min(tmp, dfixed_trunc(b));
6724 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6725 b.full = dfixed_const(1000);
6726 c.full = dfixed_const(lb_fill_bw);
6727 b.full = dfixed_div(c, b);
6728 a.full = dfixed_div(a, b);
6729 line_fill_time = dfixed_trunc(a);
6731 if (line_fill_time < wm->active_time)
6734 return latency + (line_fill_time - wm->active_time);
6739 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6740 * average and available dram bandwidth
6742 * @wm: watermark calculation data
6744 * Check if the display average bandwidth fits in the display
6745 * dram bandwidth (CIK).
6746 * Used for display watermark bandwidth calculations
6747 * Returns true if the display fits, false if not.
6749 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6751 if (dce8_average_bandwidth(wm) <=
6752 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6759 * dce8_average_bandwidth_vs_available_bandwidth - check
6760 * average and available bandwidth
6762 * @wm: watermark calculation data
6764 * Check if the display average bandwidth fits in the display
6765 * available bandwidth (CIK).
6766 * Used for display watermark bandwidth calculations
6767 * Returns true if the display fits, false if not.
6769 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6771 if (dce8_average_bandwidth(wm) <=
6772 (dce8_available_bandwidth(wm) / wm->num_heads))
6779 * dce8_check_latency_hiding - check latency hiding
6781 * @wm: watermark calculation data
6783 * Check latency hiding (CIK).
6784 * Used for display watermark bandwidth calculations
6785 * Returns true if the display fits, false if not.
6787 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6789 u32 lb_partitions = wm->lb_size / wm->src_width;
6790 u32 line_time = wm->active_time + wm->blank_time;
6791 u32 latency_tolerant_lines;
6795 a.full = dfixed_const(1);
6796 if (wm->vsc.full > a.full)
6797 latency_tolerant_lines = 1;
6799 if (lb_partitions <= (wm->vtaps + 1))
6800 latency_tolerant_lines = 1;
6802 latency_tolerant_lines = 2;
6805 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6807 if (dce8_latency_watermark(wm) <= latency_hiding)
6814 * dce8_program_watermarks - program display watermarks
6816 * @rdev: radeon_device pointer
6817 * @radeon_crtc: the selected display controller
6818 * @lb_size: line buffer size
6819 * @num_heads: number of display controllers in use
6821 * Calculate and program the display watermarks for the
6822 * selected display controller (CIK).
6824 static void dce8_program_watermarks(struct radeon_device *rdev,
6825 struct radeon_crtc *radeon_crtc,
6826 u32 lb_size, u32 num_heads)
6828 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6829 struct dce8_wm_params wm;
6832 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6835 if (radeon_crtc->base.enabled && num_heads && mode) {
6836 pixel_period = 1000000 / (u32)mode->clock;
6837 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6839 wm.yclk = rdev->pm.current_mclk * 10;
6840 wm.sclk = rdev->pm.current_sclk * 10;
6841 wm.disp_clk = mode->clock;
6842 wm.src_width = mode->crtc_hdisplay;
6843 wm.active_time = mode->crtc_hdisplay * pixel_period;
6844 wm.blank_time = line_time - wm.active_time;
6845 wm.interlaced = false;
6846 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6847 wm.interlaced = true;
6848 wm.vsc = radeon_crtc->vsc;
6850 if (radeon_crtc->rmx_type != RMX_OFF)
6852 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6853 wm.lb_size = lb_size;
6854 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6855 wm.num_heads = num_heads;
6857 /* set for high clocks */
6858 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6859 /* set for low clocks */
6860 /* wm.yclk = low clk; wm.sclk = low clk */
6861 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6863 /* possibly force display priority to high */
6864 /* should really do this at mode validation time... */
6865 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6866 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6867 !dce8_check_latency_hiding(&wm) ||
6868 (rdev->disp_priority == 2)) {
6869 DRM_DEBUG_KMS("force priority to high\n");
6874 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6876 tmp &= ~LATENCY_WATERMARK_MASK(3);
6877 tmp |= LATENCY_WATERMARK_MASK(1);
6878 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6879 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6880 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6881 LATENCY_HIGH_WATERMARK(line_time)));
6883 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6884 tmp &= ~LATENCY_WATERMARK_MASK(3);
6885 tmp |= LATENCY_WATERMARK_MASK(2);
6886 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6887 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6888 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6889 LATENCY_HIGH_WATERMARK(line_time)));
6890 /* restore original selection */
6891 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6895 * dce8_bandwidth_update - program display watermarks
6897 * @rdev: radeon_device pointer
6899 * Calculate and program the display watermarks and line
6900 * buffer allocation (CIK).
6902 void dce8_bandwidth_update(struct radeon_device *rdev)
6904 struct drm_display_mode *mode = NULL;
6905 u32 num_heads = 0, lb_size;
6908 radeon_update_display_priority(rdev);
6910 for (i = 0; i < rdev->num_crtc; i++) {
6911 if (rdev->mode_info.crtcs[i]->base.enabled)
6914 for (i = 0; i < rdev->num_crtc; i++) {
6915 mode = &rdev->mode_info.crtcs[i]->base.mode;
6916 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6917 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6922 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6924 * @rdev: radeon_device pointer
6926 * Fetches a GPU clock counter snapshot (SI).
6927 * Returns the 64 bit clock counter snapshot.
6929 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6933 mutex_lock(&rdev->gpu_clock_mutex);
6934 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6935 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6936 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6937 mutex_unlock(&rdev->gpu_clock_mutex);
6941 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6942 u32 cntl_reg, u32 status_reg)
6945 struct atom_clock_dividers dividers;
6948 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6949 clock, false, ÷rs);
6953 tmp = RREG32_SMC(cntl_reg);
6954 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6955 tmp |= dividers.post_divider;
6956 WREG32_SMC(cntl_reg, tmp);
6958 for (i = 0; i < 100; i++) {
6959 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6969 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6973 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6977 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6981 int cik_uvd_resume(struct radeon_device *rdev)
6987 r = radeon_uvd_resume(rdev);
6991 /* programm the VCPU memory controller bits 0-27 */
6992 addr = rdev->uvd.gpu_addr >> 3;
6993 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6994 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6995 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6998 size = RADEON_UVD_STACK_SIZE >> 3;
6999 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
7000 WREG32(UVD_VCPU_CACHE_SIZE1, size);
7003 size = RADEON_UVD_HEAP_SIZE >> 3;
7004 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7005 WREG32(UVD_VCPU_CACHE_SIZE2, size);
7008 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7009 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7012 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7013 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));