]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
xen/hvc: constify hv_ops structures
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58
59 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68
69 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
70 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
71 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
73 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
74 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
75
76 /* BPM SERDES CMD */
77 #define SET_BPM_SERDES_CMD    1
78 #define CLE_BPM_SERDES_CMD    0
79
80 /* BPM Register Address*/
81 enum {
82         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
83         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
84         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
85         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
86         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
87         BPM_REG_FGCG_MAX
88 };
89
90 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
96
97 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
102
103 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123
124 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
125 {
126         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
127         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
128         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
129         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
130         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
131         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
132         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
133         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
134         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
135         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
136         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
137         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
138         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
139         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
140         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
141         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
142 };
143
144 static const u32 golden_settings_tonga_a11[] =
145 {
146         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
147         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
148         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
149         mmGB_GPU_ID, 0x0000000f, 0x00000000,
150         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
151         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
152         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
153         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
154         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
155         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
156         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
157         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
158         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
159         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
160         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
161 };
162
163 static const u32 tonga_golden_common_all[] =
164 {
165         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
166         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
167         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
168         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
169         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
170         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
171         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
172         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
173 };
174
175 static const u32 tonga_mgcg_cgcg_init[] =
176 {
177         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
178         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
179         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
180         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
181         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
182         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
183         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
184         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
185         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
186         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
187         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
188         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
189         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
190         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
191         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
192         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
193         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
194         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
195         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
196         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
197         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
198         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
199         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
202         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
203         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
204         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
207         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
208         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
209         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
210         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
211         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
212         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
213         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
214         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
215         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
216         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
217         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
218         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
219         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
220         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
221         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
222         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
223         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
224         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
225         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
226         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
227         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
228         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
229         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
230         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
231         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
232         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
233         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
236         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
241         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
246         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
249         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
250         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
251         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
252 };
253
254 static const u32 fiji_golden_common_all[] =
255 {
256         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
258         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
259         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
260         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
261         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
262         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
263         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
264         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
265         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
266 };
267
268 static const u32 golden_settings_fiji_a10[] =
269 {
270         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
271         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
272         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
273         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
274         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
276         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
277         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
278         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
279         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
280         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
281 };
282
283 static const u32 fiji_mgcg_cgcg_init[] =
284 {
285         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
286         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
287         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
288         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
289         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
290         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
291         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
292         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
293         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
294         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
295         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
296         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
297         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
298         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
299         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
300         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
301         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
302         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
303         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
304         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
305         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
306         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
307         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
308         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
309         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
310         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
311         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
312         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
313         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
314         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
315         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
316         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
317         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
318         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
319         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
320 };
321
322 static const u32 golden_settings_iceland_a11[] =
323 {
324         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
325         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
326         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
327         mmGB_GPU_ID, 0x0000000f, 0x00000000,
328         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
329         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
330         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
331         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
332         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
333         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
334         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
335         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
336         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
337         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
338         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
339 };
340
341 static const u32 iceland_golden_common_all[] =
342 {
343         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
344         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
345         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
346         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
347         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
348         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
349         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
350         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
351 };
352
353 static const u32 iceland_mgcg_cgcg_init[] =
354 {
355         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
356         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
358         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
359         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
360         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
361         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
362         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
364         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
366         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
373         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
374         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
375         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
377         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
378         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
380         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
381         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
382         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
384         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
385         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
386         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
387         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
388         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
389         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
390         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
391         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
392         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
393         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
394         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
395         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
396         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
397         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
398         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
399         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
400         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
401         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
402         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
403         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
404         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
405         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
406         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
407         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
408         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
409         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
410         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
411         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
412         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
413         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
414         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
415         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
416         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
417         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
418         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
419 };
420
421 static const u32 cz_golden_settings_a11[] =
422 {
423         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
424         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
425         mmGB_GPU_ID, 0x0000000f, 0x00000000,
426         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
427         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
428         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
429         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
430         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
431         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
432         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
433 };
434
435 static const u32 cz_golden_common_all[] =
436 {
437         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
438         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
439         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
440         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
441         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
442         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
443         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
444         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
445 };
446
447 static const u32 cz_mgcg_cgcg_init[] =
448 {
449         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
450         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
451         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
460         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
464         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
465         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
468         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
469         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
470         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
471         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
472         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
473         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
474         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
475         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
477         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
478         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
479         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
480         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
483         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
488         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
491         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
492         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
493         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
494         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
495         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
496         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
497         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
498         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
499         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
500         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
501         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
502         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
503         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
504         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
505         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
506         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
507         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
508         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
509         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
510         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
511         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
512         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
513         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
514         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
515         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
516         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
517         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
518         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
519         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
520         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
521         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
522         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
523         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
524 };
525
526 static const u32 stoney_golden_settings_a11[] =
527 {
528         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
529         mmGB_GPU_ID, 0x0000000f, 0x00000000,
530         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
531         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
532         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
533         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
534         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
535         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
536         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
537         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
538 };
539
540 static const u32 stoney_golden_common_all[] =
541 {
542         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
544         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
545         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
546         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
547         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
548         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
549         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
550 };
551
552 static const u32 stoney_mgcg_cgcg_init[] =
553 {
554         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
555         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
556         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
557         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
558         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
559         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
560 };
561
562 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
563 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
564 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
565
566 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
567 {
568         switch (adev->asic_type) {
569         case CHIP_TOPAZ:
570                 amdgpu_program_register_sequence(adev,
571                                                  iceland_mgcg_cgcg_init,
572                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
573                 amdgpu_program_register_sequence(adev,
574                                                  golden_settings_iceland_a11,
575                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
576                 amdgpu_program_register_sequence(adev,
577                                                  iceland_golden_common_all,
578                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
579                 break;
580         case CHIP_FIJI:
581                 amdgpu_program_register_sequence(adev,
582                                                  fiji_mgcg_cgcg_init,
583                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
584                 amdgpu_program_register_sequence(adev,
585                                                  golden_settings_fiji_a10,
586                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
587                 amdgpu_program_register_sequence(adev,
588                                                  fiji_golden_common_all,
589                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
590                 break;
591
592         case CHIP_TONGA:
593                 amdgpu_program_register_sequence(adev,
594                                                  tonga_mgcg_cgcg_init,
595                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
596                 amdgpu_program_register_sequence(adev,
597                                                  golden_settings_tonga_a11,
598                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
599                 amdgpu_program_register_sequence(adev,
600                                                  tonga_golden_common_all,
601                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
602                 break;
603         case CHIP_CARRIZO:
604                 amdgpu_program_register_sequence(adev,
605                                                  cz_mgcg_cgcg_init,
606                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
607                 amdgpu_program_register_sequence(adev,
608                                                  cz_golden_settings_a11,
609                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
610                 amdgpu_program_register_sequence(adev,
611                                                  cz_golden_common_all,
612                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
613                 break;
614         case CHIP_STONEY:
615                 amdgpu_program_register_sequence(adev,
616                                                  stoney_mgcg_cgcg_init,
617                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
618                 amdgpu_program_register_sequence(adev,
619                                                  stoney_golden_settings_a11,
620                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
621                 amdgpu_program_register_sequence(adev,
622                                                  stoney_golden_common_all,
623                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
624                 break;
625         default:
626                 break;
627         }
628 }
629
630 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
631 {
632         int i;
633
634         adev->gfx.scratch.num_reg = 7;
635         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
636         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
637                 adev->gfx.scratch.free[i] = true;
638                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
639         }
640 }
641
642 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
643 {
644         struct amdgpu_device *adev = ring->adev;
645         uint32_t scratch;
646         uint32_t tmp = 0;
647         unsigned i;
648         int r;
649
650         r = amdgpu_gfx_scratch_get(adev, &scratch);
651         if (r) {
652                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
653                 return r;
654         }
655         WREG32(scratch, 0xCAFEDEAD);
656         r = amdgpu_ring_lock(ring, 3);
657         if (r) {
658                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
659                           ring->idx, r);
660                 amdgpu_gfx_scratch_free(adev, scratch);
661                 return r;
662         }
663         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
664         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
665         amdgpu_ring_write(ring, 0xDEADBEEF);
666         amdgpu_ring_unlock_commit(ring);
667
668         for (i = 0; i < adev->usec_timeout; i++) {
669                 tmp = RREG32(scratch);
670                 if (tmp == 0xDEADBEEF)
671                         break;
672                 DRM_UDELAY(1);
673         }
674         if (i < adev->usec_timeout) {
675                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
676                          ring->idx, i);
677         } else {
678                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
679                           ring->idx, scratch, tmp);
680                 r = -EINVAL;
681         }
682         amdgpu_gfx_scratch_free(adev, scratch);
683         return r;
684 }
685
686 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
687 {
688         struct amdgpu_device *adev = ring->adev;
689         struct amdgpu_ib ib;
690         struct fence *f = NULL;
691         uint32_t scratch;
692         uint32_t tmp = 0;
693         unsigned i;
694         int r;
695
696         r = amdgpu_gfx_scratch_get(adev, &scratch);
697         if (r) {
698                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
699                 return r;
700         }
701         WREG32(scratch, 0xCAFEDEAD);
702         memset(&ib, 0, sizeof(ib));
703         r = amdgpu_ib_get(ring, NULL, 256, &ib);
704         if (r) {
705                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
706                 goto err1;
707         }
708         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
709         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
710         ib.ptr[2] = 0xDEADBEEF;
711         ib.length_dw = 3;
712
713         r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
714                                                  AMDGPU_FENCE_OWNER_UNDEFINED,
715                                                  &f);
716         if (r)
717                 goto err2;
718
719         r = fence_wait(f, false);
720         if (r) {
721                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
722                 goto err2;
723         }
724         for (i = 0; i < adev->usec_timeout; i++) {
725                 tmp = RREG32(scratch);
726                 if (tmp == 0xDEADBEEF)
727                         break;
728                 DRM_UDELAY(1);
729         }
730         if (i < adev->usec_timeout) {
731                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
732                          ring->idx, i);
733                 goto err2;
734         } else {
735                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
736                           scratch, tmp);
737                 r = -EINVAL;
738         }
739 err2:
740         fence_put(f);
741         amdgpu_ib_free(adev, &ib);
742 err1:
743         amdgpu_gfx_scratch_free(adev, scratch);
744         return r;
745 }
746
747 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
748 {
749         const char *chip_name;
750         char fw_name[30];
751         int err;
752         struct amdgpu_firmware_info *info = NULL;
753         const struct common_firmware_header *header = NULL;
754         const struct gfx_firmware_header_v1_0 *cp_hdr;
755
756         DRM_DEBUG("\n");
757
758         switch (adev->asic_type) {
759         case CHIP_TOPAZ:
760                 chip_name = "topaz";
761                 break;
762         case CHIP_TONGA:
763                 chip_name = "tonga";
764                 break;
765         case CHIP_CARRIZO:
766                 chip_name = "carrizo";
767                 break;
768         case CHIP_FIJI:
769                 chip_name = "fiji";
770                 break;
771         case CHIP_STONEY:
772                 chip_name = "stoney";
773                 break;
774         default:
775                 BUG();
776         }
777
778         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
779         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
780         if (err)
781                 goto out;
782         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
783         if (err)
784                 goto out;
785         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
786         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
787         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
788
789         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
790         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
791         if (err)
792                 goto out;
793         err = amdgpu_ucode_validate(adev->gfx.me_fw);
794         if (err)
795                 goto out;
796         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
797         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
798         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
799
800         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
801         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
802         if (err)
803                 goto out;
804         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
805         if (err)
806                 goto out;
807         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
808         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
809         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
810
811         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
812         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
813         if (err)
814                 goto out;
815         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
816         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
817         adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
818         adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
819
820         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
821         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
822         if (err)
823                 goto out;
824         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
825         if (err)
826                 goto out;
827         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
828         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
829         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
830
831         if (adev->asic_type != CHIP_STONEY) {
832                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
833                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
834                 if (!err) {
835                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
836                         if (err)
837                                 goto out;
838                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
839                                 adev->gfx.mec2_fw->data;
840                         adev->gfx.mec2_fw_version =
841                                 le32_to_cpu(cp_hdr->header.ucode_version);
842                         adev->gfx.mec2_feature_version =
843                                 le32_to_cpu(cp_hdr->ucode_feature_version);
844                 } else {
845                         err = 0;
846                         adev->gfx.mec2_fw = NULL;
847                 }
848         }
849
850         if (adev->firmware.smu_load) {
851                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
852                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
853                 info->fw = adev->gfx.pfp_fw;
854                 header = (const struct common_firmware_header *)info->fw->data;
855                 adev->firmware.fw_size +=
856                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
857
858                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
859                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
860                 info->fw = adev->gfx.me_fw;
861                 header = (const struct common_firmware_header *)info->fw->data;
862                 adev->firmware.fw_size +=
863                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
864
865                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
866                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
867                 info->fw = adev->gfx.ce_fw;
868                 header = (const struct common_firmware_header *)info->fw->data;
869                 adev->firmware.fw_size +=
870                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
871
872                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
873                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
874                 info->fw = adev->gfx.rlc_fw;
875                 header = (const struct common_firmware_header *)info->fw->data;
876                 adev->firmware.fw_size +=
877                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
878
879                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
880                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
881                 info->fw = adev->gfx.mec_fw;
882                 header = (const struct common_firmware_header *)info->fw->data;
883                 adev->firmware.fw_size +=
884                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
885
886                 if (adev->gfx.mec2_fw) {
887                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
888                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
889                         info->fw = adev->gfx.mec2_fw;
890                         header = (const struct common_firmware_header *)info->fw->data;
891                         adev->firmware.fw_size +=
892                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
893                 }
894
895         }
896
897 out:
898         if (err) {
899                 dev_err(adev->dev,
900                         "gfx8: Failed to load firmware \"%s\"\n",
901                         fw_name);
902                 release_firmware(adev->gfx.pfp_fw);
903                 adev->gfx.pfp_fw = NULL;
904                 release_firmware(adev->gfx.me_fw);
905                 adev->gfx.me_fw = NULL;
906                 release_firmware(adev->gfx.ce_fw);
907                 adev->gfx.ce_fw = NULL;
908                 release_firmware(adev->gfx.rlc_fw);
909                 adev->gfx.rlc_fw = NULL;
910                 release_firmware(adev->gfx.mec_fw);
911                 adev->gfx.mec_fw = NULL;
912                 release_firmware(adev->gfx.mec2_fw);
913                 adev->gfx.mec2_fw = NULL;
914         }
915         return err;
916 }
917
918 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
919 {
920         int r;
921
922         if (adev->gfx.mec.hpd_eop_obj) {
923                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
924                 if (unlikely(r != 0))
925                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
926                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
927                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
928
929                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
930                 adev->gfx.mec.hpd_eop_obj = NULL;
931         }
932 }
933
934 #define MEC_HPD_SIZE 2048
935
936 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
937 {
938         int r;
939         u32 *hpd;
940
941         /*
942          * we assign only 1 pipe because all other pipes will
943          * be handled by KFD
944          */
945         adev->gfx.mec.num_mec = 1;
946         adev->gfx.mec.num_pipe = 1;
947         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
948
949         if (adev->gfx.mec.hpd_eop_obj == NULL) {
950                 r = amdgpu_bo_create(adev,
951                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
952                                      PAGE_SIZE, true,
953                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
954                                      &adev->gfx.mec.hpd_eop_obj);
955                 if (r) {
956                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
957                         return r;
958                 }
959         }
960
961         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
962         if (unlikely(r != 0)) {
963                 gfx_v8_0_mec_fini(adev);
964                 return r;
965         }
966         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
967                           &adev->gfx.mec.hpd_eop_gpu_addr);
968         if (r) {
969                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
970                 gfx_v8_0_mec_fini(adev);
971                 return r;
972         }
973         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
974         if (r) {
975                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
976                 gfx_v8_0_mec_fini(adev);
977                 return r;
978         }
979
980         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
981
982         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
983         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
984
985         return 0;
986 }
987
988 static const u32 vgpr_init_compute_shader[] =
989 {
990         0x7e000209, 0x7e020208,
991         0x7e040207, 0x7e060206,
992         0x7e080205, 0x7e0a0204,
993         0x7e0c0203, 0x7e0e0202,
994         0x7e100201, 0x7e120200,
995         0x7e140209, 0x7e160208,
996         0x7e180207, 0x7e1a0206,
997         0x7e1c0205, 0x7e1e0204,
998         0x7e200203, 0x7e220202,
999         0x7e240201, 0x7e260200,
1000         0x7e280209, 0x7e2a0208,
1001         0x7e2c0207, 0x7e2e0206,
1002         0x7e300205, 0x7e320204,
1003         0x7e340203, 0x7e360202,
1004         0x7e380201, 0x7e3a0200,
1005         0x7e3c0209, 0x7e3e0208,
1006         0x7e400207, 0x7e420206,
1007         0x7e440205, 0x7e460204,
1008         0x7e480203, 0x7e4a0202,
1009         0x7e4c0201, 0x7e4e0200,
1010         0x7e500209, 0x7e520208,
1011         0x7e540207, 0x7e560206,
1012         0x7e580205, 0x7e5a0204,
1013         0x7e5c0203, 0x7e5e0202,
1014         0x7e600201, 0x7e620200,
1015         0x7e640209, 0x7e660208,
1016         0x7e680207, 0x7e6a0206,
1017         0x7e6c0205, 0x7e6e0204,
1018         0x7e700203, 0x7e720202,
1019         0x7e740201, 0x7e760200,
1020         0x7e780209, 0x7e7a0208,
1021         0x7e7c0207, 0x7e7e0206,
1022         0xbf8a0000, 0xbf810000,
1023 };
1024
1025 static const u32 sgpr_init_compute_shader[] =
1026 {
1027         0xbe8a0100, 0xbe8c0102,
1028         0xbe8e0104, 0xbe900106,
1029         0xbe920108, 0xbe940100,
1030         0xbe960102, 0xbe980104,
1031         0xbe9a0106, 0xbe9c0108,
1032         0xbe9e0100, 0xbea00102,
1033         0xbea20104, 0xbea40106,
1034         0xbea60108, 0xbea80100,
1035         0xbeaa0102, 0xbeac0104,
1036         0xbeae0106, 0xbeb00108,
1037         0xbeb20100, 0xbeb40102,
1038         0xbeb60104, 0xbeb80106,
1039         0xbeba0108, 0xbebc0100,
1040         0xbebe0102, 0xbec00104,
1041         0xbec20106, 0xbec40108,
1042         0xbec60100, 0xbec80102,
1043         0xbee60004, 0xbee70005,
1044         0xbeea0006, 0xbeeb0007,
1045         0xbee80008, 0xbee90009,
1046         0xbefc0000, 0xbf8a0000,
1047         0xbf810000, 0x00000000,
1048 };
1049
1050 static const u32 vgpr_init_regs[] =
1051 {
1052         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1053         mmCOMPUTE_RESOURCE_LIMITS, 0,
1054         mmCOMPUTE_NUM_THREAD_X, 256*4,
1055         mmCOMPUTE_NUM_THREAD_Y, 1,
1056         mmCOMPUTE_NUM_THREAD_Z, 1,
1057         mmCOMPUTE_PGM_RSRC2, 20,
1058         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1059         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1060         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1061         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1062         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1063         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1064         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1065         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1066         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1067         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1068 };
1069
1070 static const u32 sgpr1_init_regs[] =
1071 {
1072         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1073         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1074         mmCOMPUTE_NUM_THREAD_X, 256*5,
1075         mmCOMPUTE_NUM_THREAD_Y, 1,
1076         mmCOMPUTE_NUM_THREAD_Z, 1,
1077         mmCOMPUTE_PGM_RSRC2, 20,
1078         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1079         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1080         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1081         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1082         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1083         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1084         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1085         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1086         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1087         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1088 };
1089
1090 static const u32 sgpr2_init_regs[] =
1091 {
1092         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1093         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1094         mmCOMPUTE_NUM_THREAD_X, 256*5,
1095         mmCOMPUTE_NUM_THREAD_Y, 1,
1096         mmCOMPUTE_NUM_THREAD_Z, 1,
1097         mmCOMPUTE_PGM_RSRC2, 20,
1098         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1099         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1100         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1101         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1102         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1103         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1104         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1105         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1106         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1107         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1108 };
1109
1110 static const u32 sec_ded_counter_registers[] =
1111 {
1112         mmCPC_EDC_ATC_CNT,
1113         mmCPC_EDC_SCRATCH_CNT,
1114         mmCPC_EDC_UCODE_CNT,
1115         mmCPF_EDC_ATC_CNT,
1116         mmCPF_EDC_ROQ_CNT,
1117         mmCPF_EDC_TAG_CNT,
1118         mmCPG_EDC_ATC_CNT,
1119         mmCPG_EDC_DMA_CNT,
1120         mmCPG_EDC_TAG_CNT,
1121         mmDC_EDC_CSINVOC_CNT,
1122         mmDC_EDC_RESTORE_CNT,
1123         mmDC_EDC_STATE_CNT,
1124         mmGDS_EDC_CNT,
1125         mmGDS_EDC_GRBM_CNT,
1126         mmGDS_EDC_OA_DED,
1127         mmSPI_EDC_CNT,
1128         mmSQC_ATC_EDC_GATCL1_CNT,
1129         mmSQC_EDC_CNT,
1130         mmSQ_EDC_DED_CNT,
1131         mmSQ_EDC_INFO,
1132         mmSQ_EDC_SEC_CNT,
1133         mmTCC_EDC_CNT,
1134         mmTCP_ATC_EDC_GATCL1_CNT,
1135         mmTCP_EDC_CNT,
1136         mmTD_EDC_CNT
1137 };
1138
1139 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1140 {
1141         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1142         struct amdgpu_ib ib;
1143         struct fence *f = NULL;
1144         int r, i;
1145         u32 tmp;
1146         unsigned total_size, vgpr_offset, sgpr_offset;
1147         u64 gpu_addr;
1148
1149         /* only supported on CZ */
1150         if (adev->asic_type != CHIP_CARRIZO)
1151                 return 0;
1152
1153         /* bail if the compute ring is not ready */
1154         if (!ring->ready)
1155                 return 0;
1156
1157         tmp = RREG32(mmGB_EDC_MODE);
1158         WREG32(mmGB_EDC_MODE, 0);
1159
1160         total_size =
1161                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162         total_size +=
1163                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1164         total_size +=
1165                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1166         total_size = ALIGN(total_size, 256);
1167         vgpr_offset = total_size;
1168         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1169         sgpr_offset = total_size;
1170         total_size += sizeof(sgpr_init_compute_shader);
1171
1172         /* allocate an indirect buffer to put the commands in */
1173         memset(&ib, 0, sizeof(ib));
1174         r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1175         if (r) {
1176                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1177                 return r;
1178         }
1179
1180         /* load the compute shaders */
1181         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1182                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1183
1184         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1185                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1186
1187         /* init the ib length to 0 */
1188         ib.length_dw = 0;
1189
1190         /* VGPR */
1191         /* write the register state for the compute dispatch */
1192         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1193                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1194                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1195                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1196         }
1197         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1198         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1199         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1200         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1201         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1202         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1203
1204         /* write dispatch packet */
1205         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1206         ib.ptr[ib.length_dw++] = 8; /* x */
1207         ib.ptr[ib.length_dw++] = 1; /* y */
1208         ib.ptr[ib.length_dw++] = 1; /* z */
1209         ib.ptr[ib.length_dw++] =
1210                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1211
1212         /* write CS partial flush packet */
1213         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1214         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1215
1216         /* SGPR1 */
1217         /* write the register state for the compute dispatch */
1218         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1219                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1220                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1221                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1222         }
1223         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1224         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1225         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1226         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1227         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1228         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1229
1230         /* write dispatch packet */
1231         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1232         ib.ptr[ib.length_dw++] = 8; /* x */
1233         ib.ptr[ib.length_dw++] = 1; /* y */
1234         ib.ptr[ib.length_dw++] = 1; /* z */
1235         ib.ptr[ib.length_dw++] =
1236                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1237
1238         /* write CS partial flush packet */
1239         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1240         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1241
1242         /* SGPR2 */
1243         /* write the register state for the compute dispatch */
1244         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1245                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1246                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1247                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1248         }
1249         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1250         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1251         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1252         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1253         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1254         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1255
1256         /* write dispatch packet */
1257         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1258         ib.ptr[ib.length_dw++] = 8; /* x */
1259         ib.ptr[ib.length_dw++] = 1; /* y */
1260         ib.ptr[ib.length_dw++] = 1; /* z */
1261         ib.ptr[ib.length_dw++] =
1262                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1263
1264         /* write CS partial flush packet */
1265         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1266         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1267
1268         /* shedule the ib on the ring */
1269         r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1270                                                  AMDGPU_FENCE_OWNER_UNDEFINED,
1271                                                  &f);
1272         if (r) {
1273                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1274                 goto fail;
1275         }
1276
1277         /* wait for the GPU to finish processing the IB */
1278         r = fence_wait(f, false);
1279         if (r) {
1280                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1281                 goto fail;
1282         }
1283
1284         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1285         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1286         WREG32(mmGB_EDC_MODE, tmp);
1287
1288         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1289         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1290         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1291
1292
1293         /* read back registers to clear the counters */
1294         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1295                 RREG32(sec_ded_counter_registers[i]);
1296
1297 fail:
1298         fence_put(f);
1299         amdgpu_ib_free(adev, &ib);
1300
1301         return r;
1302 }
1303
1304 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1305 {
1306         u32 gb_addr_config;
1307         u32 mc_shared_chmap, mc_arb_ramcfg;
1308         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1309         u32 tmp;
1310
1311         switch (adev->asic_type) {
1312         case CHIP_TOPAZ:
1313                 adev->gfx.config.max_shader_engines = 1;
1314                 adev->gfx.config.max_tile_pipes = 2;
1315                 adev->gfx.config.max_cu_per_sh = 6;
1316                 adev->gfx.config.max_sh_per_se = 1;
1317                 adev->gfx.config.max_backends_per_se = 2;
1318                 adev->gfx.config.max_texture_channel_caches = 2;
1319                 adev->gfx.config.max_gprs = 256;
1320                 adev->gfx.config.max_gs_threads = 32;
1321                 adev->gfx.config.max_hw_contexts = 8;
1322
1323                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1324                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1325                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1326                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1327                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1328                 break;
1329         case CHIP_FIJI:
1330                 adev->gfx.config.max_shader_engines = 4;
1331                 adev->gfx.config.max_tile_pipes = 16;
1332                 adev->gfx.config.max_cu_per_sh = 16;
1333                 adev->gfx.config.max_sh_per_se = 1;
1334                 adev->gfx.config.max_backends_per_se = 4;
1335                 adev->gfx.config.max_texture_channel_caches = 16;
1336                 adev->gfx.config.max_gprs = 256;
1337                 adev->gfx.config.max_gs_threads = 32;
1338                 adev->gfx.config.max_hw_contexts = 8;
1339
1340                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1344                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1345                 break;
1346         case CHIP_TONGA:
1347                 adev->gfx.config.max_shader_engines = 4;
1348                 adev->gfx.config.max_tile_pipes = 8;
1349                 adev->gfx.config.max_cu_per_sh = 8;
1350                 adev->gfx.config.max_sh_per_se = 1;
1351                 adev->gfx.config.max_backends_per_se = 2;
1352                 adev->gfx.config.max_texture_channel_caches = 8;
1353                 adev->gfx.config.max_gprs = 256;
1354                 adev->gfx.config.max_gs_threads = 32;
1355                 adev->gfx.config.max_hw_contexts = 8;
1356
1357                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1361                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1362                 break;
1363         case CHIP_CARRIZO:
1364                 adev->gfx.config.max_shader_engines = 1;
1365                 adev->gfx.config.max_tile_pipes = 2;
1366                 adev->gfx.config.max_sh_per_se = 1;
1367                 adev->gfx.config.max_backends_per_se = 2;
1368
1369                 switch (adev->pdev->revision) {
1370                 case 0xc4:
1371                 case 0x84:
1372                 case 0xc8:
1373                 case 0xcc:
1374                 case 0xe1:
1375                 case 0xe3:
1376                         /* B10 */
1377                         adev->gfx.config.max_cu_per_sh = 8;
1378                         break;
1379                 case 0xc5:
1380                 case 0x81:
1381                 case 0x85:
1382                 case 0xc9:
1383                 case 0xcd:
1384                 case 0xe2:
1385                 case 0xe4:
1386                         /* B8 */
1387                         adev->gfx.config.max_cu_per_sh = 6;
1388                         break;
1389                 case 0xc6:
1390                 case 0xca:
1391                 case 0xce:
1392                 case 0x88:
1393                         /* B6 */
1394                         adev->gfx.config.max_cu_per_sh = 6;
1395                         break;
1396                 case 0xc7:
1397                 case 0x87:
1398                 case 0xcb:
1399                 case 0xe5:
1400                 case 0x89:
1401                 default:
1402                         /* B4 */
1403                         adev->gfx.config.max_cu_per_sh = 4;
1404                         break;
1405                 }
1406
1407                 adev->gfx.config.max_texture_channel_caches = 2;
1408                 adev->gfx.config.max_gprs = 256;
1409                 adev->gfx.config.max_gs_threads = 32;
1410                 adev->gfx.config.max_hw_contexts = 8;
1411
1412                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1413                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1414                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1415                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1416                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1417                 break;
1418         case CHIP_STONEY:
1419                 adev->gfx.config.max_shader_engines = 1;
1420                 adev->gfx.config.max_tile_pipes = 2;
1421                 adev->gfx.config.max_sh_per_se = 1;
1422                 adev->gfx.config.max_backends_per_se = 1;
1423
1424                 switch (adev->pdev->revision) {
1425                 case 0xc0:
1426                 case 0xc1:
1427                 case 0xc2:
1428                 case 0xc4:
1429                 case 0xc8:
1430                 case 0xc9:
1431                         adev->gfx.config.max_cu_per_sh = 3;
1432                         break;
1433                 case 0xd0:
1434                 case 0xd1:
1435                 case 0xd2:
1436                 default:
1437                         adev->gfx.config.max_cu_per_sh = 2;
1438                         break;
1439                 }
1440
1441                 adev->gfx.config.max_texture_channel_caches = 2;
1442                 adev->gfx.config.max_gprs = 256;
1443                 adev->gfx.config.max_gs_threads = 16;
1444                 adev->gfx.config.max_hw_contexts = 8;
1445
1446                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1447                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1448                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1449                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1450                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1451                 break;
1452         default:
1453                 adev->gfx.config.max_shader_engines = 2;
1454                 adev->gfx.config.max_tile_pipes = 4;
1455                 adev->gfx.config.max_cu_per_sh = 2;
1456                 adev->gfx.config.max_sh_per_se = 1;
1457                 adev->gfx.config.max_backends_per_se = 2;
1458                 adev->gfx.config.max_texture_channel_caches = 4;
1459                 adev->gfx.config.max_gprs = 256;
1460                 adev->gfx.config.max_gs_threads = 32;
1461                 adev->gfx.config.max_hw_contexts = 8;
1462
1463                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1464                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1465                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1466                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1467                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1468                 break;
1469         }
1470
1471         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1472         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1473         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1474
1475         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1476         adev->gfx.config.mem_max_burst_length_bytes = 256;
1477         if (adev->flags & AMD_IS_APU) {
1478                 /* Get memory bank mapping mode. */
1479                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1480                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482
1483                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1484                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1485                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1486
1487                 /* Validate settings in case only one DIMM installed. */
1488                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1489                         dimm00_addr_map = 0;
1490                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1491                         dimm01_addr_map = 0;
1492                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1493                         dimm10_addr_map = 0;
1494                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1495                         dimm11_addr_map = 0;
1496
1497                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1498                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1499                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1500                         adev->gfx.config.mem_row_size_in_kb = 2;
1501                 else
1502                         adev->gfx.config.mem_row_size_in_kb = 1;
1503         } else {
1504                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1505                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1506                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1507                         adev->gfx.config.mem_row_size_in_kb = 4;
1508         }
1509
1510         adev->gfx.config.shader_engine_tile_size = 32;
1511         adev->gfx.config.num_gpus = 1;
1512         adev->gfx.config.multi_gpu_tile_size = 64;
1513
1514         /* fix up row size */
1515         switch (adev->gfx.config.mem_row_size_in_kb) {
1516         case 1:
1517         default:
1518                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1519                 break;
1520         case 2:
1521                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1522                 break;
1523         case 4:
1524                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1525                 break;
1526         }
1527         adev->gfx.config.gb_addr_config = gb_addr_config;
1528 }
1529
1530 static int gfx_v8_0_sw_init(void *handle)
1531 {
1532         int i, r;
1533         struct amdgpu_ring *ring;
1534         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1535
1536         /* EOP Event */
1537         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1538         if (r)
1539                 return r;
1540
1541         /* Privileged reg */
1542         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1543         if (r)
1544                 return r;
1545
1546         /* Privileged inst */
1547         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1548         if (r)
1549                 return r;
1550
1551         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1552
1553         gfx_v8_0_scratch_init(adev);
1554
1555         r = gfx_v8_0_init_microcode(adev);
1556         if (r) {
1557                 DRM_ERROR("Failed to load gfx firmware!\n");
1558                 return r;
1559         }
1560
1561         r = gfx_v8_0_mec_init(adev);
1562         if (r) {
1563                 DRM_ERROR("Failed to init MEC BOs!\n");
1564                 return r;
1565         }
1566
1567         /* set up the gfx ring */
1568         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1569                 ring = &adev->gfx.gfx_ring[i];
1570                 ring->ring_obj = NULL;
1571                 sprintf(ring->name, "gfx");
1572                 /* no gfx doorbells on iceland */
1573                 if (adev->asic_type != CHIP_TOPAZ) {
1574                         ring->use_doorbell = true;
1575                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1576                 }
1577
1578                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1579                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1580                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1581                                      AMDGPU_RING_TYPE_GFX);
1582                 if (r)
1583                         return r;
1584         }
1585
1586         /* set up the compute queues */
1587         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1588                 unsigned irq_type;
1589
1590                 /* max 32 queues per MEC */
1591                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1592                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1593                         break;
1594                 }
1595                 ring = &adev->gfx.compute_ring[i];
1596                 ring->ring_obj = NULL;
1597                 ring->use_doorbell = true;
1598                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1599                 ring->me = 1; /* first MEC */
1600                 ring->pipe = i / 8;
1601                 ring->queue = i % 8;
1602                 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1603                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1604                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1605                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1606                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1607                                      &adev->gfx.eop_irq, irq_type,
1608                                      AMDGPU_RING_TYPE_COMPUTE);
1609                 if (r)
1610                         return r;
1611         }
1612
1613         /* reserve GDS, GWS and OA resource for gfx */
1614         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1615                         PAGE_SIZE, true,
1616                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1617                         NULL, &adev->gds.gds_gfx_bo);
1618         if (r)
1619                 return r;
1620
1621         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1622                 PAGE_SIZE, true,
1623                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1624                 NULL, &adev->gds.gws_gfx_bo);
1625         if (r)
1626                 return r;
1627
1628         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1629                         PAGE_SIZE, true,
1630                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1631                         NULL, &adev->gds.oa_gfx_bo);
1632         if (r)
1633                 return r;
1634
1635         adev->gfx.ce_ram_size = 0x8000;
1636
1637         gfx_v8_0_gpu_early_init(adev);
1638
1639         return 0;
1640 }
1641
1642 static int gfx_v8_0_sw_fini(void *handle)
1643 {
1644         int i;
1645         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1646
1647         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1648         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1649         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1650
1651         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1652                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1653         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1654                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1655
1656         gfx_v8_0_mec_fini(adev);
1657
1658         return 0;
1659 }
1660
1661 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1662 {
1663         uint32_t *modearray, *mod2array;
1664         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1665         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1666         u32 reg_offset;
1667
1668         modearray = adev->gfx.config.tile_mode_array;
1669         mod2array = adev->gfx.config.macrotile_mode_array;
1670
1671         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1672                 modearray[reg_offset] = 0;
1673
1674         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1675                 mod2array[reg_offset] = 0;
1676
1677         switch (adev->asic_type) {
1678         case CHIP_TOPAZ:
1679                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680                                 PIPE_CONFIG(ADDR_SURF_P2) |
1681                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1682                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684                                 PIPE_CONFIG(ADDR_SURF_P2) |
1685                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1686                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688                                 PIPE_CONFIG(ADDR_SURF_P2) |
1689                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1690                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692                                 PIPE_CONFIG(ADDR_SURF_P2) |
1693                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1694                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1696                                 PIPE_CONFIG(ADDR_SURF_P2) |
1697                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1700                                 PIPE_CONFIG(ADDR_SURF_P2) |
1701                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1704                                 PIPE_CONFIG(ADDR_SURF_P2) |
1705                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1707                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1708                                 PIPE_CONFIG(ADDR_SURF_P2));
1709                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1710                                 PIPE_CONFIG(ADDR_SURF_P2) |
1711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1714                                  PIPE_CONFIG(ADDR_SURF_P2) |
1715                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1717                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1718                                  PIPE_CONFIG(ADDR_SURF_P2) |
1719                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1720                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1721                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1722                                  PIPE_CONFIG(ADDR_SURF_P2) |
1723                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1726                                  PIPE_CONFIG(ADDR_SURF_P2) |
1727                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1730                                  PIPE_CONFIG(ADDR_SURF_P2) |
1731                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1733                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1734                                  PIPE_CONFIG(ADDR_SURF_P2) |
1735                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1737                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738                                  PIPE_CONFIG(ADDR_SURF_P2) |
1739                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1740                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1742                                  PIPE_CONFIG(ADDR_SURF_P2) |
1743                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1746                                  PIPE_CONFIG(ADDR_SURF_P2) |
1747                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1750                                  PIPE_CONFIG(ADDR_SURF_P2) |
1751                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1754                                  PIPE_CONFIG(ADDR_SURF_P2) |
1755                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1756                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1758                                  PIPE_CONFIG(ADDR_SURF_P2) |
1759                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1760                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1762                                  PIPE_CONFIG(ADDR_SURF_P2) |
1763                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1766                                  PIPE_CONFIG(ADDR_SURF_P2) |
1767                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1768                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1769                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1770                                  PIPE_CONFIG(ADDR_SURF_P2) |
1771                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774                                  PIPE_CONFIG(ADDR_SURF_P2) |
1775                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1777                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1778                                  PIPE_CONFIG(ADDR_SURF_P2) |
1779                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1780                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1781
1782                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785                                 NUM_BANKS(ADDR_SURF_8_BANK));
1786                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1787                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789                                 NUM_BANKS(ADDR_SURF_8_BANK));
1790                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1791                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1793                                 NUM_BANKS(ADDR_SURF_8_BANK));
1794                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1797                                 NUM_BANKS(ADDR_SURF_8_BANK));
1798                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1800                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801                                 NUM_BANKS(ADDR_SURF_8_BANK));
1802                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805                                 NUM_BANKS(ADDR_SURF_8_BANK));
1806                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1807                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1808                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1809                                 NUM_BANKS(ADDR_SURF_8_BANK));
1810                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1812                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813                                 NUM_BANKS(ADDR_SURF_16_BANK));
1814                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1815                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817                                 NUM_BANKS(ADDR_SURF_16_BANK));
1818                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1820                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821                                  NUM_BANKS(ADDR_SURF_16_BANK));
1822                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825                                  NUM_BANKS(ADDR_SURF_16_BANK));
1826                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1828                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829                                  NUM_BANKS(ADDR_SURF_16_BANK));
1830                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1833                                  NUM_BANKS(ADDR_SURF_16_BANK));
1834                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1837                                  NUM_BANKS(ADDR_SURF_8_BANK));
1838
1839                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1840                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1841                             reg_offset != 23)
1842                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1843
1844                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1845                         if (reg_offset != 7)
1846                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1847
1848                 break;
1849         case CHIP_FIJI:
1850                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1867                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1871                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1876                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1879                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1880                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1882                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1883                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1884                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1885                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1889                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1892                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1894                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1897                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1898                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1899                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1900                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1901                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1905                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1909                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1912                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1914                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1917                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1920                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1923                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1925                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1929                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1933                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1938                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1941                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1943                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1945                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1947                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1949                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1953                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1955                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1956                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1957                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1961                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1964                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1966                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1969                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1970                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1971                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1972
1973                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976                                 NUM_BANKS(ADDR_SURF_8_BANK));
1977                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980                                 NUM_BANKS(ADDR_SURF_8_BANK));
1981                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984                                 NUM_BANKS(ADDR_SURF_8_BANK));
1985                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1987                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1988                                 NUM_BANKS(ADDR_SURF_8_BANK));
1989                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1991                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992                                 NUM_BANKS(ADDR_SURF_8_BANK));
1993                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996                                 NUM_BANKS(ADDR_SURF_8_BANK));
1997                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1999                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2000                                 NUM_BANKS(ADDR_SURF_8_BANK));
2001                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2003                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004                                 NUM_BANKS(ADDR_SURF_8_BANK));
2005                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2007                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2008                                 NUM_BANKS(ADDR_SURF_8_BANK));
2009                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012                                  NUM_BANKS(ADDR_SURF_8_BANK));
2013                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2015                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2016                                  NUM_BANKS(ADDR_SURF_8_BANK));
2017                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2019                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020                                  NUM_BANKS(ADDR_SURF_8_BANK));
2021                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2024                                  NUM_BANKS(ADDR_SURF_8_BANK));
2025                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2027                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2028                                  NUM_BANKS(ADDR_SURF_4_BANK));
2029
2030                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2031                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2032
2033                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2034                         if (reg_offset != 7)
2035                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2036
2037                 break;
2038         case CHIP_TONGA:
2039                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2065                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2070                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2071                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2072                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2073                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2083                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2086                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2087                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2088                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2089                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2094                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2098                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2101                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2103                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2107                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2109                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2112                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2114                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2118                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2122                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2127                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2130                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2131                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2132                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2134                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2136                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2138                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2142                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2145                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2146                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2161
2162                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165                                 NUM_BANKS(ADDR_SURF_16_BANK));
2166                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169                                 NUM_BANKS(ADDR_SURF_16_BANK));
2170                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173                                 NUM_BANKS(ADDR_SURF_16_BANK));
2174                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2176                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2177                                 NUM_BANKS(ADDR_SURF_16_BANK));
2178                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181                                 NUM_BANKS(ADDR_SURF_16_BANK));
2182                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185                                 NUM_BANKS(ADDR_SURF_16_BANK));
2186                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189                                 NUM_BANKS(ADDR_SURF_16_BANK));
2190                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2192                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193                                 NUM_BANKS(ADDR_SURF_16_BANK));
2194                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2196                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2197                                 NUM_BANKS(ADDR_SURF_16_BANK));
2198                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2200                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201                                  NUM_BANKS(ADDR_SURF_16_BANK));
2202                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2205                                  NUM_BANKS(ADDR_SURF_16_BANK));
2206                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209                                  NUM_BANKS(ADDR_SURF_8_BANK));
2210                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213                                  NUM_BANKS(ADDR_SURF_4_BANK));
2214                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2216                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2217                                  NUM_BANKS(ADDR_SURF_4_BANK));
2218
2219                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2221
2222                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2223                         if (reg_offset != 7)
2224                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2225
2226                 break;
2227         case CHIP_STONEY:
2228                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P2) |
2230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P2) |
2234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                 PIPE_CONFIG(ADDR_SURF_P2) |
2238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241                                 PIPE_CONFIG(ADDR_SURF_P2) |
2242                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2243                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245                                 PIPE_CONFIG(ADDR_SURF_P2) |
2246                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                 PIPE_CONFIG(ADDR_SURF_P2) |
2250                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2253                                 PIPE_CONFIG(ADDR_SURF_P2) |
2254                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2257                                 PIPE_CONFIG(ADDR_SURF_P2));
2258                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2259                                 PIPE_CONFIG(ADDR_SURF_P2) |
2260                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263                                  PIPE_CONFIG(ADDR_SURF_P2) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267                                  PIPE_CONFIG(ADDR_SURF_P2) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271                                  PIPE_CONFIG(ADDR_SURF_P2) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275                                  PIPE_CONFIG(ADDR_SURF_P2) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2279                                  PIPE_CONFIG(ADDR_SURF_P2) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283                                  PIPE_CONFIG(ADDR_SURF_P2) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287                                  PIPE_CONFIG(ADDR_SURF_P2) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291                                  PIPE_CONFIG(ADDR_SURF_P2) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295                                  PIPE_CONFIG(ADDR_SURF_P2) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299                                  PIPE_CONFIG(ADDR_SURF_P2) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P2) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2307                                  PIPE_CONFIG(ADDR_SURF_P2) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2311                                  PIPE_CONFIG(ADDR_SURF_P2) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2315                                  PIPE_CONFIG(ADDR_SURF_P2) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319                                  PIPE_CONFIG(ADDR_SURF_P2) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                  PIPE_CONFIG(ADDR_SURF_P2) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327                                  PIPE_CONFIG(ADDR_SURF_P2) |
2328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330
2331                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334                                 NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2338                                 NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                 NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346                                 NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362                                 NUM_BANKS(ADDR_SURF_16_BANK));
2363                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366                                 NUM_BANKS(ADDR_SURF_16_BANK));
2367                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370                                  NUM_BANKS(ADDR_SURF_16_BANK));
2371                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374                                  NUM_BANKS(ADDR_SURF_16_BANK));
2375                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378                                  NUM_BANKS(ADDR_SURF_16_BANK));
2379                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382                                  NUM_BANKS(ADDR_SURF_16_BANK));
2383                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386                                  NUM_BANKS(ADDR_SURF_8_BANK));
2387
2388                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2389                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2390                             reg_offset != 23)
2391                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2392
2393                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2394                         if (reg_offset != 7)
2395                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2396
2397                 break;
2398         default:
2399                 dev_warn(adev->dev,
2400                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2401                          adev->asic_type);
2402
2403         case CHIP_CARRIZO:
2404                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405                                 PIPE_CONFIG(ADDR_SURF_P2) |
2406                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2407                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                 PIPE_CONFIG(ADDR_SURF_P2) |
2410                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2411                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                 PIPE_CONFIG(ADDR_SURF_P2) |
2414                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P2) |
2418                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2419                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P2) |
2422                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P2) |
2426                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429                                 PIPE_CONFIG(ADDR_SURF_P2) |
2430                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433                                 PIPE_CONFIG(ADDR_SURF_P2));
2434                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435                                 PIPE_CONFIG(ADDR_SURF_P2) |
2436                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                  PIPE_CONFIG(ADDR_SURF_P2) |
2440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443                                  PIPE_CONFIG(ADDR_SURF_P2) |
2444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447                                  PIPE_CONFIG(ADDR_SURF_P2) |
2448                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                  PIPE_CONFIG(ADDR_SURF_P2) |
2452                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2455                                  PIPE_CONFIG(ADDR_SURF_P2) |
2456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                  PIPE_CONFIG(ADDR_SURF_P2) |
2460                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463                                  PIPE_CONFIG(ADDR_SURF_P2) |
2464                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2467                                  PIPE_CONFIG(ADDR_SURF_P2) |
2468                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2471                                  PIPE_CONFIG(ADDR_SURF_P2) |
2472                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2475                                  PIPE_CONFIG(ADDR_SURF_P2) |
2476                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2479                                  PIPE_CONFIG(ADDR_SURF_P2) |
2480                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2483                                  PIPE_CONFIG(ADDR_SURF_P2) |
2484                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2487                                  PIPE_CONFIG(ADDR_SURF_P2) |
2488                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2491                                  PIPE_CONFIG(ADDR_SURF_P2) |
2492                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2495                                  PIPE_CONFIG(ADDR_SURF_P2) |
2496                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                                  PIPE_CONFIG(ADDR_SURF_P2) |
2500                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503                                  PIPE_CONFIG(ADDR_SURF_P2) |
2504                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2505                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2506
2507                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510                                 NUM_BANKS(ADDR_SURF_8_BANK));
2511                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514                                 NUM_BANKS(ADDR_SURF_8_BANK));
2515                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518                                 NUM_BANKS(ADDR_SURF_8_BANK));
2519                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                 NUM_BANKS(ADDR_SURF_8_BANK));
2523                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526                                 NUM_BANKS(ADDR_SURF_8_BANK));
2527                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530                                 NUM_BANKS(ADDR_SURF_8_BANK));
2531                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2534                                 NUM_BANKS(ADDR_SURF_8_BANK));
2535                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538                                 NUM_BANKS(ADDR_SURF_16_BANK));
2539                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2540                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542                                 NUM_BANKS(ADDR_SURF_16_BANK));
2543                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546                                  NUM_BANKS(ADDR_SURF_16_BANK));
2547                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2548                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550                                  NUM_BANKS(ADDR_SURF_16_BANK));
2551                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554                                  NUM_BANKS(ADDR_SURF_16_BANK));
2555                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558                                  NUM_BANKS(ADDR_SURF_16_BANK));
2559                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562                                  NUM_BANKS(ADDR_SURF_8_BANK));
2563
2564                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2566                             reg_offset != 23)
2567                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2568
2569                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2570                         if (reg_offset != 7)
2571                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2572
2573                 break;
2574         }
2575 }
2576
2577 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2578 {
2579         return (u32)((1ULL << bit_width) - 1);
2580 }
2581
2582 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2583 {
2584         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2585
2586         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2587                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2588                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2589         } else if (se_num == 0xffffffff) {
2590                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2591                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2592         } else if (sh_num == 0xffffffff) {
2593                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2594                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2595         } else {
2596                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2597                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2598         }
2599         WREG32(mmGRBM_GFX_INDEX, data);
2600 }
2601
2602 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2603                                     u32 max_rb_num_per_se,
2604                                     u32 sh_per_se)
2605 {
2606         u32 data, mask;
2607
2608         data = RREG32(mmCC_RB_BACKEND_DISABLE);
2609         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2610
2611         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2612
2613         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2614
2615         mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2616
2617         return data & mask;
2618 }
2619
2620 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2621                               u32 se_num, u32 sh_per_se,
2622                               u32 max_rb_num_per_se)
2623 {
2624         int i, j;
2625         u32 data, mask;
2626         u32 disabled_rbs = 0;
2627         u32 enabled_rbs = 0;
2628
2629         mutex_lock(&adev->grbm_idx_mutex);
2630         for (i = 0; i < se_num; i++) {
2631                 for (j = 0; j < sh_per_se; j++) {
2632                         gfx_v8_0_select_se_sh(adev, i, j);
2633                         data = gfx_v8_0_get_rb_disabled(adev,
2634                                               max_rb_num_per_se, sh_per_se);
2635                         disabled_rbs |= data << ((i * sh_per_se + j) *
2636                                                  RB_BITMAP_WIDTH_PER_SH);
2637                 }
2638         }
2639         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2640         mutex_unlock(&adev->grbm_idx_mutex);
2641
2642         mask = 1;
2643         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2644                 if (!(disabled_rbs & mask))
2645                         enabled_rbs |= mask;
2646                 mask <<= 1;
2647         }
2648
2649         adev->gfx.config.backend_enable_mask = enabled_rbs;
2650
2651         mutex_lock(&adev->grbm_idx_mutex);
2652         for (i = 0; i < se_num; i++) {
2653                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2654                 data = RREG32(mmPA_SC_RASTER_CONFIG);
2655                 for (j = 0; j < sh_per_se; j++) {
2656                         switch (enabled_rbs & 3) {
2657                         case 0:
2658                                 if (j == 0)
2659                                         data |= (RASTER_CONFIG_RB_MAP_3 <<
2660                                                  PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2661                                 else
2662                                         data |= (RASTER_CONFIG_RB_MAP_0 <<
2663                                                  PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2664                                 break;
2665                         case 1:
2666                                 data |= (RASTER_CONFIG_RB_MAP_0 <<
2667                                          (i * sh_per_se + j) * 2);
2668                                 break;
2669                         case 2:
2670                                 data |= (RASTER_CONFIG_RB_MAP_3 <<
2671                                          (i * sh_per_se + j) * 2);
2672                                 break;
2673                         case 3:
2674                         default:
2675                                 data |= (RASTER_CONFIG_RB_MAP_2 <<
2676                                          (i * sh_per_se + j) * 2);
2677                                 break;
2678                         }
2679                         enabled_rbs >>= 2;
2680                 }
2681                 WREG32(mmPA_SC_RASTER_CONFIG, data);
2682         }
2683         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2684         mutex_unlock(&adev->grbm_idx_mutex);
2685 }
2686
2687 /**
2688  * gfx_v8_0_init_compute_vmid - gart enable
2689  *
2690  * @rdev: amdgpu_device pointer
2691  *
2692  * Initialize compute vmid sh_mem registers
2693  *
2694  */
2695 #define DEFAULT_SH_MEM_BASES    (0x6000)
2696 #define FIRST_COMPUTE_VMID      (8)
2697 #define LAST_COMPUTE_VMID       (16)
2698 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2699 {
2700         int i;
2701         uint32_t sh_mem_config;
2702         uint32_t sh_mem_bases;
2703
2704         /*
2705          * Configure apertures:
2706          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2707          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2708          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2709          */
2710         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2711
2712         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2713                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2714                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2715                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2716                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2717                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2718
2719         mutex_lock(&adev->srbm_mutex);
2720         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2721                 vi_srbm_select(adev, 0, 0, 0, i);
2722                 /* CP and shaders */
2723                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2724                 WREG32(mmSH_MEM_APE1_BASE, 1);
2725                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2726                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2727         }
2728         vi_srbm_select(adev, 0, 0, 0, 0);
2729         mutex_unlock(&adev->srbm_mutex);
2730 }
2731
2732 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2733 {
2734         u32 tmp;
2735         int i;
2736
2737         tmp = RREG32(mmGRBM_CNTL);
2738         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2739         WREG32(mmGRBM_CNTL, tmp);
2740
2741         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2742         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2743         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2744         WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2745                adev->gfx.config.gb_addr_config & 0x70);
2746         WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2747                adev->gfx.config.gb_addr_config & 0x70);
2748         WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749         WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2750         WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2751
2752         gfx_v8_0_tiling_mode_table_init(adev);
2753
2754         gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2755                                  adev->gfx.config.max_sh_per_se,
2756                                  adev->gfx.config.max_backends_per_se);
2757
2758         /* XXX SH_MEM regs */
2759         /* where to put LDS, scratch, GPUVM in FSA64 space */
2760         mutex_lock(&adev->srbm_mutex);
2761         for (i = 0; i < 16; i++) {
2762                 vi_srbm_select(adev, 0, 0, 0, i);
2763                 /* CP and shaders */
2764                 if (i == 0) {
2765                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2766                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2767                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2768                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2769                         WREG32(mmSH_MEM_CONFIG, tmp);
2770                 } else {
2771                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2772                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2773                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2774                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2775                         WREG32(mmSH_MEM_CONFIG, tmp);
2776                 }
2777
2778                 WREG32(mmSH_MEM_APE1_BASE, 1);
2779                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2780                 WREG32(mmSH_MEM_BASES, 0);
2781         }
2782         vi_srbm_select(adev, 0, 0, 0, 0);
2783         mutex_unlock(&adev->srbm_mutex);
2784
2785         gfx_v8_0_init_compute_vmid(adev);
2786
2787         mutex_lock(&adev->grbm_idx_mutex);
2788         /*
2789          * making sure that the following register writes will be broadcasted
2790          * to all the shaders
2791          */
2792         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2793
2794         WREG32(mmPA_SC_FIFO_SIZE,
2795                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
2796                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2797                    (adev->gfx.config.sc_prim_fifo_size_backend <<
2798                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2799                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
2800                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2801                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2802                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2803         mutex_unlock(&adev->grbm_idx_mutex);
2804
2805 }
2806
2807 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2808 {
2809         u32 i, j, k;
2810         u32 mask;
2811
2812         mutex_lock(&adev->grbm_idx_mutex);
2813         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2814                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2815                         gfx_v8_0_select_se_sh(adev, i, j);
2816                         for (k = 0; k < adev->usec_timeout; k++) {
2817                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2818                                         break;
2819                                 udelay(1);
2820                         }
2821                 }
2822         }
2823         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2824         mutex_unlock(&adev->grbm_idx_mutex);
2825
2826         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2827                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2828                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2829                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2830         for (k = 0; k < adev->usec_timeout; k++) {
2831                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2832                         break;
2833                 udelay(1);
2834         }
2835 }
2836
2837 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2838                                                bool enable)
2839 {
2840         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2841
2842         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2843         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2844         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2845         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2846
2847         WREG32(mmCP_INT_CNTL_RING0, tmp);
2848 }
2849
2850 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2851 {
2852         u32 tmp = RREG32(mmRLC_CNTL);
2853
2854         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2855         WREG32(mmRLC_CNTL, tmp);
2856
2857         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2858
2859         gfx_v8_0_wait_for_rlc_serdes(adev);
2860 }
2861
2862 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2863 {
2864         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2865
2866         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2867         WREG32(mmGRBM_SOFT_RESET, tmp);
2868         udelay(50);
2869         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2870         WREG32(mmGRBM_SOFT_RESET, tmp);
2871         udelay(50);
2872 }
2873
2874 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2875 {
2876         u32 tmp = RREG32(mmRLC_CNTL);
2877
2878         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2879         WREG32(mmRLC_CNTL, tmp);
2880
2881         /* carrizo do enable cp interrupt after cp inited */
2882         if (!(adev->flags & AMD_IS_APU))
2883                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2884
2885         udelay(50);
2886 }
2887
2888 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2889 {
2890         const struct rlc_firmware_header_v2_0 *hdr;
2891         const __le32 *fw_data;
2892         unsigned i, fw_size;
2893
2894         if (!adev->gfx.rlc_fw)
2895                 return -EINVAL;
2896
2897         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2898         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2899
2900         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2901                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2902         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2903
2904         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2905         for (i = 0; i < fw_size; i++)
2906                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2907         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2908
2909         return 0;
2910 }
2911
2912 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2913 {
2914         int r;
2915
2916         gfx_v8_0_rlc_stop(adev);
2917
2918         /* disable CG */
2919         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2920
2921         /* disable PG */
2922         WREG32(mmRLC_PG_CNTL, 0);
2923
2924         gfx_v8_0_rlc_reset(adev);
2925
2926         if (!adev->pp_enabled) {
2927                 if (!adev->firmware.smu_load) {
2928                         /* legacy rlc firmware loading */
2929                         r = gfx_v8_0_rlc_load_microcode(adev);
2930                         if (r)
2931                                 return r;
2932                 } else {
2933                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2934                                                         AMDGPU_UCODE_ID_RLC_G);
2935                         if (r)
2936                                 return -EINVAL;
2937                 }
2938         }
2939
2940         gfx_v8_0_rlc_start(adev);
2941
2942         return 0;
2943 }
2944
2945 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2946 {
2947         int i;
2948         u32 tmp = RREG32(mmCP_ME_CNTL);
2949
2950         if (enable) {
2951                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2952                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2953                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2954         } else {
2955                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2956                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2957                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2958                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2959                         adev->gfx.gfx_ring[i].ready = false;
2960         }
2961         WREG32(mmCP_ME_CNTL, tmp);
2962         udelay(50);
2963 }
2964
2965 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2966 {
2967         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2968         const struct gfx_firmware_header_v1_0 *ce_hdr;
2969         const struct gfx_firmware_header_v1_0 *me_hdr;
2970         const __le32 *fw_data;
2971         unsigned i, fw_size;
2972
2973         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2974                 return -EINVAL;
2975
2976         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2977                 adev->gfx.pfp_fw->data;
2978         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2979                 adev->gfx.ce_fw->data;
2980         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2981                 adev->gfx.me_fw->data;
2982
2983         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2984         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2985         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2986
2987         gfx_v8_0_cp_gfx_enable(adev, false);
2988
2989         /* PFP */
2990         fw_data = (const __le32 *)
2991                 (adev->gfx.pfp_fw->data +
2992                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2993         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2994         WREG32(mmCP_PFP_UCODE_ADDR, 0);
2995         for (i = 0; i < fw_size; i++)
2996                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2997         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2998
2999         /* CE */
3000         fw_data = (const __le32 *)
3001                 (adev->gfx.ce_fw->data +
3002                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3003         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3004         WREG32(mmCP_CE_UCODE_ADDR, 0);
3005         for (i = 0; i < fw_size; i++)
3006                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3007         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3008
3009         /* ME */
3010         fw_data = (const __le32 *)
3011                 (adev->gfx.me_fw->data +
3012                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3013         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3014         WREG32(mmCP_ME_RAM_WADDR, 0);
3015         for (i = 0; i < fw_size; i++)
3016                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3017         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3018
3019         return 0;
3020 }
3021
3022 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3023 {
3024         u32 count = 0;
3025         const struct cs_section_def *sect = NULL;
3026         const struct cs_extent_def *ext = NULL;
3027
3028         /* begin clear state */
3029         count += 2;
3030         /* context control state */
3031         count += 3;
3032
3033         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3034                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3035                         if (sect->id == SECT_CONTEXT)
3036                                 count += 2 + ext->reg_count;
3037                         else
3038                                 return 0;
3039                 }
3040         }
3041         /* pa_sc_raster_config/pa_sc_raster_config1 */
3042         count += 4;
3043         /* end clear state */
3044         count += 2;
3045         /* clear state */
3046         count += 2;
3047
3048         return count;
3049 }
3050
3051 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3052 {
3053         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3054         const struct cs_section_def *sect = NULL;
3055         const struct cs_extent_def *ext = NULL;
3056         int r, i;
3057
3058         /* init the CP */
3059         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3060         WREG32(mmCP_ENDIAN_SWAP, 0);
3061         WREG32(mmCP_DEVICE_ID, 1);
3062
3063         gfx_v8_0_cp_gfx_enable(adev, true);
3064
3065         r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3066         if (r) {
3067                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068                 return r;
3069         }
3070
3071         /* clear state buffer */
3072         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3074
3075         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3076         amdgpu_ring_write(ring, 0x80000000);
3077         amdgpu_ring_write(ring, 0x80000000);
3078
3079         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3080                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3081                         if (sect->id == SECT_CONTEXT) {
3082                                 amdgpu_ring_write(ring,
3083                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3084                                                ext->reg_count));
3085                                 amdgpu_ring_write(ring,
3086                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3087                                 for (i = 0; i < ext->reg_count; i++)
3088                                         amdgpu_ring_write(ring, ext->extent[i]);
3089                         }
3090                 }
3091         }
3092
3093         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3094         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3095         switch (adev->asic_type) {
3096         case CHIP_TONGA:
3097                 amdgpu_ring_write(ring, 0x16000012);
3098                 amdgpu_ring_write(ring, 0x0000002A);
3099                 break;
3100         case CHIP_FIJI:
3101                 amdgpu_ring_write(ring, 0x3a00161a);
3102                 amdgpu_ring_write(ring, 0x0000002e);
3103                 break;
3104         case CHIP_TOPAZ:
3105         case CHIP_CARRIZO:
3106                 amdgpu_ring_write(ring, 0x00000002);
3107                 amdgpu_ring_write(ring, 0x00000000);
3108                 break;
3109         case CHIP_STONEY:
3110                 amdgpu_ring_write(ring, 0x00000000);
3111                 amdgpu_ring_write(ring, 0x00000000);
3112                 break;
3113         default:
3114                 BUG();
3115         }
3116
3117         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3118         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3119
3120         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3121         amdgpu_ring_write(ring, 0);
3122
3123         /* init the CE partitions */
3124         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3125         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3126         amdgpu_ring_write(ring, 0x8000);
3127         amdgpu_ring_write(ring, 0x8000);
3128
3129         amdgpu_ring_unlock_commit(ring);
3130
3131         return 0;
3132 }
3133
3134 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3135 {
3136         struct amdgpu_ring *ring;
3137         u32 tmp;
3138         u32 rb_bufsz;
3139         u64 rb_addr, rptr_addr;
3140         int r;
3141
3142         /* Set the write pointer delay */
3143         WREG32(mmCP_RB_WPTR_DELAY, 0);
3144
3145         /* set the RB to use vmid 0 */
3146         WREG32(mmCP_RB_VMID, 0);
3147
3148         /* Set ring buffer size */
3149         ring = &adev->gfx.gfx_ring[0];
3150         rb_bufsz = order_base_2(ring->ring_size / 8);
3151         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3152         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3153         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3154         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3155 #ifdef __BIG_ENDIAN
3156         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3157 #endif
3158         WREG32(mmCP_RB0_CNTL, tmp);
3159
3160         /* Initialize the ring buffer's read and write pointers */
3161         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3162         ring->wptr = 0;
3163         WREG32(mmCP_RB0_WPTR, ring->wptr);
3164
3165         /* set the wb address wether it's enabled or not */
3166         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3167         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3168         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3169
3170         mdelay(1);
3171         WREG32(mmCP_RB0_CNTL, tmp);
3172
3173         rb_addr = ring->gpu_addr >> 8;
3174         WREG32(mmCP_RB0_BASE, rb_addr);
3175         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3176
3177         /* no gfx doorbells on iceland */
3178         if (adev->asic_type != CHIP_TOPAZ) {
3179                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3180                 if (ring->use_doorbell) {
3181                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3182                                             DOORBELL_OFFSET, ring->doorbell_index);
3183                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3184                                             DOORBELL_EN, 1);
3185                 } else {
3186                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3187                                             DOORBELL_EN, 0);
3188                 }
3189                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3190
3191                 if (adev->asic_type == CHIP_TONGA) {
3192                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3193                                             DOORBELL_RANGE_LOWER,
3194                                             AMDGPU_DOORBELL_GFX_RING0);
3195                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3196
3197                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3198                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3199                 }
3200
3201         }
3202
3203         /* start the ring */
3204         gfx_v8_0_cp_gfx_start(adev);
3205         ring->ready = true;
3206         r = amdgpu_ring_test_ring(ring);
3207         if (r) {
3208                 ring->ready = false;
3209                 return r;
3210         }
3211
3212         return 0;
3213 }
3214
3215 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3216 {
3217         int i;
3218
3219         if (enable) {
3220                 WREG32(mmCP_MEC_CNTL, 0);
3221         } else {
3222                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3223                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3224                         adev->gfx.compute_ring[i].ready = false;
3225         }
3226         udelay(50);
3227 }
3228
3229 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3230 {
3231         gfx_v8_0_cp_compute_enable(adev, true);
3232
3233         return 0;
3234 }
3235
3236 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3237 {
3238         const struct gfx_firmware_header_v1_0 *mec_hdr;
3239         const __le32 *fw_data;
3240         unsigned i, fw_size;
3241
3242         if (!adev->gfx.mec_fw)
3243                 return -EINVAL;
3244
3245         gfx_v8_0_cp_compute_enable(adev, false);
3246
3247         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3248         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3249
3250         fw_data = (const __le32 *)
3251                 (adev->gfx.mec_fw->data +
3252                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3253         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3254
3255         /* MEC1 */
3256         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3257         for (i = 0; i < fw_size; i++)
3258                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3259         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3260
3261         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3262         if (adev->gfx.mec2_fw) {
3263                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3264
3265                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3266                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3267
3268                 fw_data = (const __le32 *)
3269                         (adev->gfx.mec2_fw->data +
3270                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3271                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3272
3273                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3274                 for (i = 0; i < fw_size; i++)
3275                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3276                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3277         }
3278
3279         return 0;
3280 }
3281
3282 struct vi_mqd {
3283         uint32_t header;  /* ordinal0 */
3284         uint32_t compute_dispatch_initiator;  /* ordinal1 */
3285         uint32_t compute_dim_x;  /* ordinal2 */
3286         uint32_t compute_dim_y;  /* ordinal3 */
3287         uint32_t compute_dim_z;  /* ordinal4 */
3288         uint32_t compute_start_x;  /* ordinal5 */
3289         uint32_t compute_start_y;  /* ordinal6 */
3290         uint32_t compute_start_z;  /* ordinal7 */
3291         uint32_t compute_num_thread_x;  /* ordinal8 */
3292         uint32_t compute_num_thread_y;  /* ordinal9 */
3293         uint32_t compute_num_thread_z;  /* ordinal10 */
3294         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3295         uint32_t compute_perfcount_enable;  /* ordinal12 */
3296         uint32_t compute_pgm_lo;  /* ordinal13 */
3297         uint32_t compute_pgm_hi;  /* ordinal14 */
3298         uint32_t compute_tba_lo;  /* ordinal15 */
3299         uint32_t compute_tba_hi;  /* ordinal16 */
3300         uint32_t compute_tma_lo;  /* ordinal17 */
3301         uint32_t compute_tma_hi;  /* ordinal18 */
3302         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3303         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3304         uint32_t compute_vmid;  /* ordinal21 */
3305         uint32_t compute_resource_limits;  /* ordinal22 */
3306         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3307         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3308         uint32_t compute_tmpring_size;  /* ordinal25 */
3309         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3310         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3311         uint32_t compute_restart_x;  /* ordinal28 */
3312         uint32_t compute_restart_y;  /* ordinal29 */
3313         uint32_t compute_restart_z;  /* ordinal30 */
3314         uint32_t compute_thread_trace_enable;  /* ordinal31 */
3315         uint32_t compute_misc_reserved;  /* ordinal32 */
3316         uint32_t compute_dispatch_id;  /* ordinal33 */
3317         uint32_t compute_threadgroup_id;  /* ordinal34 */
3318         uint32_t compute_relaunch;  /* ordinal35 */
3319         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3320         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3321         uint32_t compute_wave_restore_control;  /* ordinal38 */
3322         uint32_t reserved9;  /* ordinal39 */
3323         uint32_t reserved10;  /* ordinal40 */
3324         uint32_t reserved11;  /* ordinal41 */
3325         uint32_t reserved12;  /* ordinal42 */
3326         uint32_t reserved13;  /* ordinal43 */
3327         uint32_t reserved14;  /* ordinal44 */
3328         uint32_t reserved15;  /* ordinal45 */
3329         uint32_t reserved16;  /* ordinal46 */
3330         uint32_t reserved17;  /* ordinal47 */
3331         uint32_t reserved18;  /* ordinal48 */
3332         uint32_t reserved19;  /* ordinal49 */
3333         uint32_t reserved20;  /* ordinal50 */
3334         uint32_t reserved21;  /* ordinal51 */
3335         uint32_t reserved22;  /* ordinal52 */
3336         uint32_t reserved23;  /* ordinal53 */
3337         uint32_t reserved24;  /* ordinal54 */
3338         uint32_t reserved25;  /* ordinal55 */
3339         uint32_t reserved26;  /* ordinal56 */
3340         uint32_t reserved27;  /* ordinal57 */
3341         uint32_t reserved28;  /* ordinal58 */
3342         uint32_t reserved29;  /* ordinal59 */
3343         uint32_t reserved30;  /* ordinal60 */
3344         uint32_t reserved31;  /* ordinal61 */
3345         uint32_t reserved32;  /* ordinal62 */
3346         uint32_t reserved33;  /* ordinal63 */
3347         uint32_t reserved34;  /* ordinal64 */
3348         uint32_t compute_user_data_0;  /* ordinal65 */
3349         uint32_t compute_user_data_1;  /* ordinal66 */
3350         uint32_t compute_user_data_2;  /* ordinal67 */
3351         uint32_t compute_user_data_3;  /* ordinal68 */
3352         uint32_t compute_user_data_4;  /* ordinal69 */
3353         uint32_t compute_user_data_5;  /* ordinal70 */
3354         uint32_t compute_user_data_6;  /* ordinal71 */
3355         uint32_t compute_user_data_7;  /* ordinal72 */
3356         uint32_t compute_user_data_8;  /* ordinal73 */
3357         uint32_t compute_user_data_9;  /* ordinal74 */
3358         uint32_t compute_user_data_10;  /* ordinal75 */
3359         uint32_t compute_user_data_11;  /* ordinal76 */
3360         uint32_t compute_user_data_12;  /* ordinal77 */
3361         uint32_t compute_user_data_13;  /* ordinal78 */
3362         uint32_t compute_user_data_14;  /* ordinal79 */
3363         uint32_t compute_user_data_15;  /* ordinal80 */
3364         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3365         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3366         uint32_t reserved35;  /* ordinal83 */
3367         uint32_t reserved36;  /* ordinal84 */
3368         uint32_t reserved37;  /* ordinal85 */
3369         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3370         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3371         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3372         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3373         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3374         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3375         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3376         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3377         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3378         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3379         uint32_t reserved38;  /* ordinal96 */
3380         uint32_t reserved39;  /* ordinal97 */
3381         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3382         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3383         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3384         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3385         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3386         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3387         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3388         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3389         uint32_t reserved40;  /* ordinal106 */
3390         uint32_t reserved41;  /* ordinal107 */
3391         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3392         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3393         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3394         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3395         uint32_t reserved42;  /* ordinal112 */
3396         uint32_t reserved43;  /* ordinal113 */
3397         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3398         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3399         uint32_t cp_packet_id_lo;  /* ordinal116 */
3400         uint32_t cp_packet_id_hi;  /* ordinal117 */
3401         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3402         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3403         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3404         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3405         uint32_t gds_save_mask_lo;  /* ordinal122 */
3406         uint32_t gds_save_mask_hi;  /* ordinal123 */
3407         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3408         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3409         uint32_t reserved44;  /* ordinal126 */
3410         uint32_t reserved45;  /* ordinal127 */
3411         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3412         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3413         uint32_t cp_hqd_active;  /* ordinal130 */
3414         uint32_t cp_hqd_vmid;  /* ordinal131 */
3415         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3416         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3417         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3418         uint32_t cp_hqd_quantum;  /* ordinal135 */
3419         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3420         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3421         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3422         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3423         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3424         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3425         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3426         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3427         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3428         uint32_t cp_hqd_pq_control;  /* ordinal145 */
3429         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3430         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3431         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3432         uint32_t cp_hqd_ib_control;  /* ordinal149 */
3433         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3434         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3435         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3436         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3437         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3438         uint32_t cp_hqd_msg_type;  /* ordinal155 */
3439         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3440         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3441         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3442         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3443         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3444         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3445         uint32_t cp_mqd_control;  /* ordinal162 */
3446         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3447         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3448         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3449         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3450         uint32_t cp_hqd_eop_control;  /* ordinal167 */
3451         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3452         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3453         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3454         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3455         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3456         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3457         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3458         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3459         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3460         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3461         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3462         uint32_t cp_hqd_error;  /* ordinal179 */
3463         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3464         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3465         uint32_t reserved46;  /* ordinal182 */
3466         uint32_t reserved47;  /* ordinal183 */
3467         uint32_t reserved48;  /* ordinal184 */
3468         uint32_t reserved49;  /* ordinal185 */
3469         uint32_t reserved50;  /* ordinal186 */
3470         uint32_t reserved51;  /* ordinal187 */
3471         uint32_t reserved52;  /* ordinal188 */
3472         uint32_t reserved53;  /* ordinal189 */
3473         uint32_t reserved54;  /* ordinal190 */
3474         uint32_t reserved55;  /* ordinal191 */
3475         uint32_t iqtimer_pkt_header;  /* ordinal192 */
3476         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3477         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3478         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3479         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3480         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3481         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3482         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3483         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3484         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3485         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3486         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3487         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3488         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3489         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3490         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3491         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3492         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3493         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3494         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3495         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3496         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3497         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3498         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3499         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3500         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3501         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3502         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3503         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3504         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3505         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3506         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3507         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3508         uint32_t reserved56;  /* ordinal225 */
3509         uint32_t reserved57;  /* ordinal226 */
3510         uint32_t reserved58;  /* ordinal227 */
3511         uint32_t set_resources_header;  /* ordinal228 */
3512         uint32_t set_resources_dw1;  /* ordinal229 */
3513         uint32_t set_resources_dw2;  /* ordinal230 */
3514         uint32_t set_resources_dw3;  /* ordinal231 */
3515         uint32_t set_resources_dw4;  /* ordinal232 */
3516         uint32_t set_resources_dw5;  /* ordinal233 */
3517         uint32_t set_resources_dw6;  /* ordinal234 */
3518         uint32_t set_resources_dw7;  /* ordinal235 */
3519         uint32_t reserved59;  /* ordinal236 */
3520         uint32_t reserved60;  /* ordinal237 */
3521         uint32_t reserved61;  /* ordinal238 */
3522         uint32_t reserved62;  /* ordinal239 */
3523         uint32_t reserved63;  /* ordinal240 */
3524         uint32_t reserved64;  /* ordinal241 */
3525         uint32_t reserved65;  /* ordinal242 */
3526         uint32_t reserved66;  /* ordinal243 */
3527         uint32_t reserved67;  /* ordinal244 */
3528         uint32_t reserved68;  /* ordinal245 */
3529         uint32_t reserved69;  /* ordinal246 */
3530         uint32_t reserved70;  /* ordinal247 */
3531         uint32_t reserved71;  /* ordinal248 */
3532         uint32_t reserved72;  /* ordinal249 */
3533         uint32_t reserved73;  /* ordinal250 */
3534         uint32_t reserved74;  /* ordinal251 */
3535         uint32_t reserved75;  /* ordinal252 */
3536         uint32_t reserved76;  /* ordinal253 */
3537         uint32_t reserved77;  /* ordinal254 */
3538         uint32_t reserved78;  /* ordinal255 */
3539
3540         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3541 };
3542
3543 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3544 {
3545         int i, r;
3546
3547         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3548                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3549
3550                 if (ring->mqd_obj) {
3551                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3552                         if (unlikely(r != 0))
3553                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3554
3555                         amdgpu_bo_unpin(ring->mqd_obj);
3556                         amdgpu_bo_unreserve(ring->mqd_obj);
3557
3558                         amdgpu_bo_unref(&ring->mqd_obj);
3559                         ring->mqd_obj = NULL;
3560                 }
3561         }
3562 }
3563
3564 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3565 {
3566         int r, i, j;
3567         u32 tmp;
3568         bool use_doorbell = true;
3569         u64 hqd_gpu_addr;
3570         u64 mqd_gpu_addr;
3571         u64 eop_gpu_addr;
3572         u64 wb_gpu_addr;
3573         u32 *buf;
3574         struct vi_mqd *mqd;
3575
3576         /* init the pipes */
3577         mutex_lock(&adev->srbm_mutex);
3578         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3579                 int me = (i < 4) ? 1 : 2;
3580                 int pipe = (i < 4) ? i : (i - 4);
3581
3582                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3583                 eop_gpu_addr >>= 8;
3584
3585                 vi_srbm_select(adev, me, pipe, 0, 0);
3586
3587                 /* write the EOP addr */
3588                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3589                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3590
3591                 /* set the VMID assigned */
3592                 WREG32(mmCP_HQD_VMID, 0);
3593
3594                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3595                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3596                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3597                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
3598                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3599         }
3600         vi_srbm_select(adev, 0, 0, 0, 0);
3601         mutex_unlock(&adev->srbm_mutex);
3602
3603         /* init the queues.  Just two for now. */
3604         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3605                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3606
3607                 if (ring->mqd_obj == NULL) {
3608                         r = amdgpu_bo_create(adev,
3609                                              sizeof(struct vi_mqd),
3610                                              PAGE_SIZE, true,
3611                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3612                                              NULL, &ring->mqd_obj);
3613                         if (r) {
3614                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3615                                 return r;
3616                         }
3617                 }
3618
3619                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3620                 if (unlikely(r != 0)) {
3621                         gfx_v8_0_cp_compute_fini(adev);
3622                         return r;
3623                 }
3624                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3625                                   &mqd_gpu_addr);
3626                 if (r) {
3627                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3628                         gfx_v8_0_cp_compute_fini(adev);
3629                         return r;
3630                 }
3631                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3632                 if (r) {
3633                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3634                         gfx_v8_0_cp_compute_fini(adev);
3635                         return r;
3636                 }
3637
3638                 /* init the mqd struct */
3639                 memset(buf, 0, sizeof(struct vi_mqd));
3640
3641                 mqd = (struct vi_mqd *)buf;
3642                 mqd->header = 0xC0310800;
3643                 mqd->compute_pipelinestat_enable = 0x00000001;
3644                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3645                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3646                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3647                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3648                 mqd->compute_misc_reserved = 0x00000003;
3649
3650                 mutex_lock(&adev->srbm_mutex);
3651                 vi_srbm_select(adev, ring->me,
3652                                ring->pipe,
3653                                ring->queue, 0);
3654
3655                 /* disable wptr polling */
3656                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3657                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3658                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3659
3660                 mqd->cp_hqd_eop_base_addr_lo =
3661                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
3662                 mqd->cp_hqd_eop_base_addr_hi =
3663                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3664
3665                 /* enable doorbell? */
3666                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3667                 if (use_doorbell) {
3668                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3669                 } else {
3670                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3671                 }
3672                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3673                 mqd->cp_hqd_pq_doorbell_control = tmp;
3674
3675                 /* disable the queue if it's active */
3676                 mqd->cp_hqd_dequeue_request = 0;
3677                 mqd->cp_hqd_pq_rptr = 0;
3678                 mqd->cp_hqd_pq_wptr= 0;
3679                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3680                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3681                         for (j = 0; j < adev->usec_timeout; j++) {
3682                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3683                                         break;
3684                                 udelay(1);
3685                         }
3686                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3687                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3688                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3689                 }
3690
3691                 /* set the pointer to the MQD */
3692                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3693                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3694                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3695                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3696
3697                 /* set MQD vmid to 0 */
3698                 tmp = RREG32(mmCP_MQD_CONTROL);
3699                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3700                 WREG32(mmCP_MQD_CONTROL, tmp);
3701                 mqd->cp_mqd_control = tmp;
3702
3703                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3704                 hqd_gpu_addr = ring->gpu_addr >> 8;
3705                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3706                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3707                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3708                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3709
3710                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3711                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3712                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3713                                     (order_base_2(ring->ring_size / 4) - 1));
3714                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3715                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3716 #ifdef __BIG_ENDIAN
3717                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3718 #endif
3719                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3720                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3721                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3722                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3723                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3724                 mqd->cp_hqd_pq_control = tmp;
3725
3726                 /* set the wb address wether it's enabled or not */
3727                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3728                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3729                 mqd->cp_hqd_pq_rptr_report_addr_hi =
3730                         upper_32_bits(wb_gpu_addr) & 0xffff;
3731                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3732                        mqd->cp_hqd_pq_rptr_report_addr_lo);
3733                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3734                        mqd->cp_hqd_pq_rptr_report_addr_hi);
3735
3736                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3737                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3738                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3739                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3740                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3741                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3742                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
3743
3744                 /* enable the doorbell if requested */
3745                 if (use_doorbell) {
3746                         if ((adev->asic_type == CHIP_CARRIZO) ||
3747                             (adev->asic_type == CHIP_FIJI) ||
3748                             (adev->asic_type == CHIP_STONEY)) {
3749                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3750                                        AMDGPU_DOORBELL_KIQ << 2);
3751                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3752                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
3753                         }
3754                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3755                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3756                                             DOORBELL_OFFSET, ring->doorbell_index);
3757                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3758                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3759                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3760                         mqd->cp_hqd_pq_doorbell_control = tmp;
3761
3762                 } else {
3763                         mqd->cp_hqd_pq_doorbell_control = 0;
3764                 }
3765                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3766                        mqd->cp_hqd_pq_doorbell_control);
3767
3768                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3769                 ring->wptr = 0;
3770                 mqd->cp_hqd_pq_wptr = ring->wptr;
3771                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3772                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3773
3774                 /* set the vmid for the queue */
3775                 mqd->cp_hqd_vmid = 0;
3776                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3777
3778                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3779                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3780                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3781                 mqd->cp_hqd_persistent_state = tmp;
3782                 if (adev->asic_type == CHIP_STONEY) {
3783                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3784                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3785                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3786                 }
3787
3788                 /* activate the queue */
3789                 mqd->cp_hqd_active = 1;
3790                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3791
3792                 vi_srbm_select(adev, 0, 0, 0, 0);
3793                 mutex_unlock(&adev->srbm_mutex);
3794
3795                 amdgpu_bo_kunmap(ring->mqd_obj);
3796                 amdgpu_bo_unreserve(ring->mqd_obj);
3797         }
3798
3799         if (use_doorbell) {
3800                 tmp = RREG32(mmCP_PQ_STATUS);
3801                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3802                 WREG32(mmCP_PQ_STATUS, tmp);
3803         }
3804
3805         r = gfx_v8_0_cp_compute_start(adev);
3806         if (r)
3807                 return r;
3808
3809         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3810                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3811
3812                 ring->ready = true;
3813                 r = amdgpu_ring_test_ring(ring);
3814                 if (r)
3815                         ring->ready = false;
3816         }
3817
3818         return 0;
3819 }
3820
3821 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3822 {
3823         int r;
3824
3825         if (!(adev->flags & AMD_IS_APU))
3826                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3827
3828         if (!adev->pp_enabled) {
3829                 if (!adev->firmware.smu_load) {
3830                         /* legacy firmware loading */
3831                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
3832                         if (r)
3833                                 return r;
3834
3835                         r = gfx_v8_0_cp_compute_load_microcode(adev);
3836                         if (r)
3837                                 return r;
3838                 } else {
3839                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3840                                                         AMDGPU_UCODE_ID_CP_CE);
3841                         if (r)
3842                                 return -EINVAL;
3843
3844                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3845                                                         AMDGPU_UCODE_ID_CP_PFP);
3846                         if (r)
3847                                 return -EINVAL;
3848
3849                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3850                                                         AMDGPU_UCODE_ID_CP_ME);
3851                         if (r)
3852                                 return -EINVAL;
3853
3854                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3855                                                         AMDGPU_UCODE_ID_CP_MEC1);
3856                         if (r)
3857                                 return -EINVAL;
3858                 }
3859         }
3860
3861         r = gfx_v8_0_cp_gfx_resume(adev);
3862         if (r)
3863                 return r;
3864
3865         r = gfx_v8_0_cp_compute_resume(adev);
3866         if (r)
3867                 return r;
3868
3869         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3870
3871         return 0;
3872 }
3873
3874 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3875 {
3876         gfx_v8_0_cp_gfx_enable(adev, enable);
3877         gfx_v8_0_cp_compute_enable(adev, enable);
3878 }
3879
3880 static int gfx_v8_0_hw_init(void *handle)
3881 {
3882         int r;
3883         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3884
3885         gfx_v8_0_init_golden_registers(adev);
3886
3887         gfx_v8_0_gpu_init(adev);
3888
3889         r = gfx_v8_0_rlc_resume(adev);
3890         if (r)
3891                 return r;
3892
3893         r = gfx_v8_0_cp_resume(adev);
3894         if (r)
3895                 return r;
3896
3897         return r;
3898 }
3899
3900 static int gfx_v8_0_hw_fini(void *handle)
3901 {
3902         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3903
3904         gfx_v8_0_cp_enable(adev, false);
3905         gfx_v8_0_rlc_stop(adev);
3906         gfx_v8_0_cp_compute_fini(adev);
3907
3908         return 0;
3909 }
3910
3911 static int gfx_v8_0_suspend(void *handle)
3912 {
3913         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3914
3915         return gfx_v8_0_hw_fini(adev);
3916 }
3917
3918 static int gfx_v8_0_resume(void *handle)
3919 {
3920         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3921
3922         return gfx_v8_0_hw_init(adev);
3923 }
3924
3925 static bool gfx_v8_0_is_idle(void *handle)
3926 {
3927         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3928
3929         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3930                 return false;
3931         else
3932                 return true;
3933 }
3934
3935 static int gfx_v8_0_wait_for_idle(void *handle)
3936 {
3937         unsigned i;
3938         u32 tmp;
3939         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3940
3941         for (i = 0; i < adev->usec_timeout; i++) {
3942                 /* read MC_STATUS */
3943                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3944
3945                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3946                         return 0;
3947                 udelay(1);
3948         }
3949         return -ETIMEDOUT;
3950 }
3951
3952 static void gfx_v8_0_print_status(void *handle)
3953 {
3954         int i;
3955         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3956
3957         dev_info(adev->dev, "GFX 8.x registers\n");
3958         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3959                  RREG32(mmGRBM_STATUS));
3960         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3961                  RREG32(mmGRBM_STATUS2));
3962         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3963                  RREG32(mmGRBM_STATUS_SE0));
3964         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3965                  RREG32(mmGRBM_STATUS_SE1));
3966         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3967                  RREG32(mmGRBM_STATUS_SE2));
3968         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3969                  RREG32(mmGRBM_STATUS_SE3));
3970         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3971         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3972                  RREG32(mmCP_STALLED_STAT1));
3973         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3974                  RREG32(mmCP_STALLED_STAT2));
3975         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3976                  RREG32(mmCP_STALLED_STAT3));
3977         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3978                  RREG32(mmCP_CPF_BUSY_STAT));
3979         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3980                  RREG32(mmCP_CPF_STALLED_STAT1));
3981         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3982         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3983         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3984                  RREG32(mmCP_CPC_STALLED_STAT1));
3985         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3986
3987         for (i = 0; i < 32; i++) {
3988                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3989                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3990         }
3991         for (i = 0; i < 16; i++) {
3992                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3993                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3994         }
3995         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3996                 dev_info(adev->dev, "  se: %d\n", i);
3997                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3998                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3999                          RREG32(mmPA_SC_RASTER_CONFIG));
4000                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
4001                          RREG32(mmPA_SC_RASTER_CONFIG_1));
4002         }
4003         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4004
4005         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4006                  RREG32(mmGB_ADDR_CONFIG));
4007         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4008                  RREG32(mmHDP_ADDR_CONFIG));
4009         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4010                  RREG32(mmDMIF_ADDR_CALC));
4011         dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
4012                  RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4013         dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
4014                  RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4015         dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4016                  RREG32(mmUVD_UDEC_ADDR_CONFIG));
4017         dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4018                  RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4019         dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4020                  RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4021
4022         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4023                  RREG32(mmCP_MEQ_THRESHOLDS));
4024         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4025                  RREG32(mmSX_DEBUG_1));
4026         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4027                  RREG32(mmTA_CNTL_AUX));
4028         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4029                  RREG32(mmSPI_CONFIG_CNTL));
4030         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4031                  RREG32(mmSQ_CONFIG));
4032         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4033                  RREG32(mmDB_DEBUG));
4034         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4035                  RREG32(mmDB_DEBUG2));
4036         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4037                  RREG32(mmDB_DEBUG3));
4038         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4039                  RREG32(mmCB_HW_CONTROL));
4040         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4041                  RREG32(mmSPI_CONFIG_CNTL_1));
4042         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4043                  RREG32(mmPA_SC_FIFO_SIZE));
4044         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4045                  RREG32(mmVGT_NUM_INSTANCES));
4046         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4047                  RREG32(mmCP_PERFMON_CNTL));
4048         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4049                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4050         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4051                  RREG32(mmVGT_CACHE_INVALIDATION));
4052         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4053                  RREG32(mmVGT_GS_VERTEX_REUSE));
4054         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4055                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4056         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4057                  RREG32(mmPA_CL_ENHANCE));
4058         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4059                  RREG32(mmPA_SC_ENHANCE));
4060
4061         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4062                  RREG32(mmCP_ME_CNTL));
4063         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4064                  RREG32(mmCP_MAX_CONTEXT));
4065         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4066                  RREG32(mmCP_ENDIAN_SWAP));
4067         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4068                  RREG32(mmCP_DEVICE_ID));
4069
4070         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4071                  RREG32(mmCP_SEM_WAIT_TIMER));
4072
4073         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4074                  RREG32(mmCP_RB_WPTR_DELAY));
4075         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4076                  RREG32(mmCP_RB_VMID));
4077         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4078                  RREG32(mmCP_RB0_CNTL));
4079         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4080                  RREG32(mmCP_RB0_WPTR));
4081         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4082                  RREG32(mmCP_RB0_RPTR_ADDR));
4083         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4084                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4085         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4086                  RREG32(mmCP_RB0_CNTL));
4087         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4088                  RREG32(mmCP_RB0_BASE));
4089         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4090                  RREG32(mmCP_RB0_BASE_HI));
4091         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4092                  RREG32(mmCP_MEC_CNTL));
4093         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4094                  RREG32(mmCP_CPF_DEBUG));
4095
4096         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4097                  RREG32(mmSCRATCH_ADDR));
4098         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4099                  RREG32(mmSCRATCH_UMSK));
4100
4101         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4102                  RREG32(mmCP_INT_CNTL_RING0));
4103         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4104                  RREG32(mmRLC_LB_CNTL));
4105         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4106                  RREG32(mmRLC_CNTL));
4107         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4108                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4109         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4110                  RREG32(mmRLC_LB_CNTR_INIT));
4111         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4112                  RREG32(mmRLC_LB_CNTR_MAX));
4113         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4114                  RREG32(mmRLC_LB_INIT_CU_MASK));
4115         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4116                  RREG32(mmRLC_LB_PARAMS));
4117         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4118                  RREG32(mmRLC_LB_CNTL));
4119         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4120                  RREG32(mmRLC_MC_CNTL));
4121         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4122                  RREG32(mmRLC_UCODE_CNTL));
4123
4124         mutex_lock(&adev->srbm_mutex);
4125         for (i = 0; i < 16; i++) {
4126                 vi_srbm_select(adev, 0, 0, 0, i);
4127                 dev_info(adev->dev, "  VM %d:\n", i);
4128                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4129                          RREG32(mmSH_MEM_CONFIG));
4130                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4131                          RREG32(mmSH_MEM_APE1_BASE));
4132                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4133                          RREG32(mmSH_MEM_APE1_LIMIT));
4134                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4135                          RREG32(mmSH_MEM_BASES));
4136         }
4137         vi_srbm_select(adev, 0, 0, 0, 0);
4138         mutex_unlock(&adev->srbm_mutex);
4139 }
4140
4141 static int gfx_v8_0_soft_reset(void *handle)
4142 {
4143         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4144         u32 tmp;
4145         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4146
4147         /* GRBM_STATUS */
4148         tmp = RREG32(mmGRBM_STATUS);
4149         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4150                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4151                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4152                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4153                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4154                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4155                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4156                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4157                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4158                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4159         }
4160
4161         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4162                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4163                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4164                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4165                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4166         }
4167
4168         /* GRBM_STATUS2 */
4169         tmp = RREG32(mmGRBM_STATUS2);
4170         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4171                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4172                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4173
4174         /* SRBM_STATUS */
4175         tmp = RREG32(mmSRBM_STATUS);
4176         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4177                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4178                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4179
4180         if (grbm_soft_reset || srbm_soft_reset) {
4181                 gfx_v8_0_print_status((void *)adev);
4182                 /* stop the rlc */
4183                 gfx_v8_0_rlc_stop(adev);
4184
4185                 /* Disable GFX parsing/prefetching */
4186                 gfx_v8_0_cp_gfx_enable(adev, false);
4187
4188                 /* Disable MEC parsing/prefetching */
4189                 gfx_v8_0_cp_compute_enable(adev, false);
4190
4191                 if (grbm_soft_reset || srbm_soft_reset) {
4192                         tmp = RREG32(mmGMCON_DEBUG);
4193                         tmp = REG_SET_FIELD(tmp,
4194                                             GMCON_DEBUG, GFX_STALL, 1);
4195                         tmp = REG_SET_FIELD(tmp,
4196                                             GMCON_DEBUG, GFX_CLEAR, 1);
4197                         WREG32(mmGMCON_DEBUG, tmp);
4198
4199                         udelay(50);
4200                 }
4201
4202                 if (grbm_soft_reset) {
4203                         tmp = RREG32(mmGRBM_SOFT_RESET);
4204                         tmp |= grbm_soft_reset;
4205                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4206                         WREG32(mmGRBM_SOFT_RESET, tmp);
4207                         tmp = RREG32(mmGRBM_SOFT_RESET);
4208
4209                         udelay(50);
4210
4211                         tmp &= ~grbm_soft_reset;
4212                         WREG32(mmGRBM_SOFT_RESET, tmp);
4213                         tmp = RREG32(mmGRBM_SOFT_RESET);
4214                 }
4215
4216                 if (srbm_soft_reset) {
4217                         tmp = RREG32(mmSRBM_SOFT_RESET);
4218                         tmp |= srbm_soft_reset;
4219                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4220                         WREG32(mmSRBM_SOFT_RESET, tmp);
4221                         tmp = RREG32(mmSRBM_SOFT_RESET);
4222
4223                         udelay(50);
4224
4225                         tmp &= ~srbm_soft_reset;
4226                         WREG32(mmSRBM_SOFT_RESET, tmp);
4227                         tmp = RREG32(mmSRBM_SOFT_RESET);
4228                 }
4229
4230                 if (grbm_soft_reset || srbm_soft_reset) {
4231                         tmp = RREG32(mmGMCON_DEBUG);
4232                         tmp = REG_SET_FIELD(tmp,
4233                                             GMCON_DEBUG, GFX_STALL, 0);
4234                         tmp = REG_SET_FIELD(tmp,
4235                                             GMCON_DEBUG, GFX_CLEAR, 0);
4236                         WREG32(mmGMCON_DEBUG, tmp);
4237                 }
4238
4239                 /* Wait a little for things to settle down */
4240                 udelay(50);
4241                 gfx_v8_0_print_status((void *)adev);
4242         }
4243         return 0;
4244 }
4245
4246 /**
4247  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4248  *
4249  * @adev: amdgpu_device pointer
4250  *
4251  * Fetches a GPU clock counter snapshot.
4252  * Returns the 64 bit clock counter snapshot.
4253  */
4254 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4255 {
4256         uint64_t clock;
4257
4258         mutex_lock(&adev->gfx.gpu_clock_mutex);
4259         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4260         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4261                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4262         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4263         return clock;
4264 }
4265
4266 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4267                                           uint32_t vmid,
4268                                           uint32_t gds_base, uint32_t gds_size,
4269                                           uint32_t gws_base, uint32_t gws_size,
4270                                           uint32_t oa_base, uint32_t oa_size)
4271 {
4272         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4273         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4274
4275         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4276         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4277
4278         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4279         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4280
4281         /* GDS Base */
4282         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4283         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4284                                 WRITE_DATA_DST_SEL(0)));
4285         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4286         amdgpu_ring_write(ring, 0);
4287         amdgpu_ring_write(ring, gds_base);
4288
4289         /* GDS Size */
4290         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4291         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4292                                 WRITE_DATA_DST_SEL(0)));
4293         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4294         amdgpu_ring_write(ring, 0);
4295         amdgpu_ring_write(ring, gds_size);
4296
4297         /* GWS */
4298         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4299         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4300                                 WRITE_DATA_DST_SEL(0)));
4301         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4302         amdgpu_ring_write(ring, 0);
4303         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4304
4305         /* OA */
4306         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4307         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4308                                 WRITE_DATA_DST_SEL(0)));
4309         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4310         amdgpu_ring_write(ring, 0);
4311         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4312 }
4313
4314 static int gfx_v8_0_early_init(void *handle)
4315 {
4316         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4317
4318         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4319         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4320         gfx_v8_0_set_ring_funcs(adev);
4321         gfx_v8_0_set_irq_funcs(adev);
4322         gfx_v8_0_set_gds_init(adev);
4323
4324         return 0;
4325 }
4326
4327 static int gfx_v8_0_late_init(void *handle)
4328 {
4329         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4330         int r;
4331
4332         /* requires IBs so do in late init after IB pool is initialized */
4333         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4334         if (r)
4335                 return r;
4336
4337         return 0;
4338 }
4339
4340 static int gfx_v8_0_set_powergating_state(void *handle,
4341                                           enum amd_powergating_state state)
4342 {
4343         return 0;
4344 }
4345
4346 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4347                 uint32_t reg_addr, uint32_t cmd)
4348 {
4349         uint32_t data;
4350
4351         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4352
4353         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4354         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4355
4356         data = RREG32(mmRLC_SERDES_WR_CTRL);
4357         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4358                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4359                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4360                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4361                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4362                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4363                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4364                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4365                         RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4366                         RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4367                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4368         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4369                         (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4370                         (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4371                         (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4372
4373         WREG32(mmRLC_SERDES_WR_CTRL, data);
4374 }
4375
4376 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4377                 bool enable)
4378 {
4379         uint32_t temp, data;
4380
4381         /* It is disabled by HW by default */
4382         if (enable) {
4383                 /* 1 - RLC memory Light sleep */
4384                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4385                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4386                 if (temp != data)
4387                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4388
4389                 /* 2 - CP memory Light sleep */
4390                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4391                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4392                 if (temp != data)
4393                         WREG32(mmCP_MEM_SLP_CNTL, data);
4394
4395                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4396                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4397                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4398                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4399                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4400                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4401
4402                 if (temp != data)
4403                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4404
4405                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4406                 gfx_v8_0_wait_for_rlc_serdes(adev);
4407
4408                 /* 5 - clear mgcg override */
4409                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4410
4411                 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4412                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4413                 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4414                 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4415                 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4416                 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4417                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4418                 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4419                 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4420                 if (temp != data)
4421                         WREG32(mmCGTS_SM_CTRL_REG, data);
4422                 udelay(50);
4423
4424                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4425                 gfx_v8_0_wait_for_rlc_serdes(adev);
4426         } else {
4427                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4428                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4429                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4430                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4431                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4432                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4433                 if (temp != data)
4434                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4435
4436                 /* 2 - disable MGLS in RLC */
4437                 data = RREG32(mmRLC_MEM_SLP_CNTL);
4438                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4439                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4440                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4441                 }
4442
4443                 /* 3 - disable MGLS in CP */
4444                 data = RREG32(mmCP_MEM_SLP_CNTL);
4445                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4446                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4447                         WREG32(mmCP_MEM_SLP_CNTL, data);
4448                 }
4449
4450                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4451                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4452                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4453                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4454                 if (temp != data)
4455                         WREG32(mmCGTS_SM_CTRL_REG, data);
4456
4457                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4458                 gfx_v8_0_wait_for_rlc_serdes(adev);
4459
4460                 /* 6 - set mgcg override */
4461                 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4462
4463                 udelay(50);
4464
4465                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4466                 gfx_v8_0_wait_for_rlc_serdes(adev);
4467         }
4468 }
4469
4470 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4471                 bool enable)
4472 {
4473         uint32_t temp, temp1, data, data1;
4474
4475         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4476
4477         if (enable) {
4478                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4479                  * Cmp_busy/GFX_Idle interrupts
4480                  */
4481                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4482
4483                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4484                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4485                 if (temp1 != data1)
4486                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4487
4488                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4489                 gfx_v8_0_wait_for_rlc_serdes(adev);
4490
4491                 /* 3 - clear cgcg override */
4492                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4493
4494                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4495                 gfx_v8_0_wait_for_rlc_serdes(adev);
4496
4497                 /* 4 - write cmd to set CGLS */
4498                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4499
4500                 /* 5 - enable cgcg */
4501                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4502
4503                 /* enable cgls*/
4504                 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4505
4506                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4507                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4508
4509                 if (temp1 != data1)
4510                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4511
4512                 if (temp != data)
4513                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4514         } else {
4515                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4516                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4517
4518                 /* TEST CGCG */
4519                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4520                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4521                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4522                 if (temp1 != data1)
4523                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4524
4525                 /* read gfx register to wake up cgcg */
4526                 RREG32(mmCB_CGTT_SCLK_CTRL);
4527                 RREG32(mmCB_CGTT_SCLK_CTRL);
4528                 RREG32(mmCB_CGTT_SCLK_CTRL);
4529                 RREG32(mmCB_CGTT_SCLK_CTRL);
4530
4531                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4532                 gfx_v8_0_wait_for_rlc_serdes(adev);
4533
4534                 /* write cmd to Set CGCG Overrride */
4535                 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4536
4537                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4538                 gfx_v8_0_wait_for_rlc_serdes(adev);
4539
4540                 /* write cmd to Clear CGLS */
4541                 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4542
4543                 /* disable cgcg, cgls should be disabled too. */
4544                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4545                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4546                 if (temp != data)
4547                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4548         }
4549 }
4550 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4551                 bool enable)
4552 {
4553         if (enable) {
4554                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4555                  * ===  MGCG + MGLS + TS(CG/LS) ===
4556                  */
4557                 fiji_update_medium_grain_clock_gating(adev, enable);
4558                 fiji_update_coarse_grain_clock_gating(adev, enable);
4559         } else {
4560                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4561                  * ===  CGCG + CGLS ===
4562                  */
4563                 fiji_update_coarse_grain_clock_gating(adev, enable);
4564                 fiji_update_medium_grain_clock_gating(adev, enable);
4565         }
4566         return 0;
4567 }
4568
4569 static int gfx_v8_0_set_clockgating_state(void *handle,
4570                                           enum amd_clockgating_state state)
4571 {
4572         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4573
4574         switch (adev->asic_type) {
4575         case CHIP_FIJI:
4576                 fiji_update_gfx_clock_gating(adev,
4577                                 state == AMD_CG_STATE_GATE ? true : false);
4578                 break;
4579         default:
4580                 break;
4581         }
4582         return 0;
4583 }
4584
4585 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4586 {
4587         u32 rptr;
4588
4589         rptr = ring->adev->wb.wb[ring->rptr_offs];
4590
4591         return rptr;
4592 }
4593
4594 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4595 {
4596         struct amdgpu_device *adev = ring->adev;
4597         u32 wptr;
4598
4599         if (ring->use_doorbell)
4600                 /* XXX check if swapping is necessary on BE */
4601                 wptr = ring->adev->wb.wb[ring->wptr_offs];
4602         else
4603                 wptr = RREG32(mmCP_RB0_WPTR);
4604
4605         return wptr;
4606 }
4607
4608 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4609 {
4610         struct amdgpu_device *adev = ring->adev;
4611
4612         if (ring->use_doorbell) {
4613                 /* XXX check if swapping is necessary on BE */
4614                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4615                 WDOORBELL32(ring->doorbell_index, ring->wptr);
4616         } else {
4617                 WREG32(mmCP_RB0_WPTR, ring->wptr);
4618                 (void)RREG32(mmCP_RB0_WPTR);
4619         }
4620 }
4621
4622 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4623 {
4624         u32 ref_and_mask, reg_mem_engine;
4625
4626         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4627                 switch (ring->me) {
4628                 case 1:
4629                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4630                         break;
4631                 case 2:
4632                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4633                         break;
4634                 default:
4635                         return;
4636                 }
4637                 reg_mem_engine = 0;
4638         } else {
4639                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4640                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4641         }
4642
4643         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4644         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4645                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
4646                                  reg_mem_engine));
4647         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4648         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4649         amdgpu_ring_write(ring, ref_and_mask);
4650         amdgpu_ring_write(ring, ref_and_mask);
4651         amdgpu_ring_write(ring, 0x20); /* poll interval */
4652 }
4653
4654 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4655                                   struct amdgpu_ib *ib)
4656 {
4657         bool need_ctx_switch = ring->current_ctx != ib->ctx;
4658         u32 header, control = 0;
4659         u32 next_rptr = ring->wptr + 5;
4660
4661         /* drop the CE preamble IB for the same context */
4662         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4663                 return;
4664
4665         if (need_ctx_switch)
4666                 next_rptr += 2;
4667
4668         next_rptr += 4;
4669         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4670         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4671         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4672         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4673         amdgpu_ring_write(ring, next_rptr);
4674
4675         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4676         if (need_ctx_switch) {
4677                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4678                 amdgpu_ring_write(ring, 0);
4679         }
4680
4681         if (ib->flags & AMDGPU_IB_FLAG_CE)
4682                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4683         else
4684                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4685
4686         control |= ib->length_dw |
4687                 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4688
4689         amdgpu_ring_write(ring, header);
4690         amdgpu_ring_write(ring,
4691 #ifdef __BIG_ENDIAN
4692                           (2 << 0) |
4693 #endif
4694                           (ib->gpu_addr & 0xFFFFFFFC));
4695         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4696         amdgpu_ring_write(ring, control);
4697 }
4698
4699 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4700                                   struct amdgpu_ib *ib)
4701 {
4702         u32 header, control = 0;
4703         u32 next_rptr = ring->wptr + 5;
4704
4705         control |= INDIRECT_BUFFER_VALID;
4706
4707         next_rptr += 4;
4708         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4709         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4710         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4711         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4712         amdgpu_ring_write(ring, next_rptr);
4713
4714         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4715
4716         control |= ib->length_dw |
4717                            (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4718
4719         amdgpu_ring_write(ring, header);
4720         amdgpu_ring_write(ring,
4721 #ifdef __BIG_ENDIAN
4722                                           (2 << 0) |
4723 #endif
4724                                           (ib->gpu_addr & 0xFFFFFFFC));
4725         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4726         amdgpu_ring_write(ring, control);
4727 }
4728
4729 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4730                                          u64 seq, unsigned flags)
4731 {
4732         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4733         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4734
4735         /* EVENT_WRITE_EOP - flush caches, send int */
4736         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4737         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4738                                  EOP_TC_ACTION_EN |
4739                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4740                                  EVENT_INDEX(5)));
4741         amdgpu_ring_write(ring, addr & 0xfffffffc);
4742         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4743                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4744         amdgpu_ring_write(ring, lower_32_bits(seq));
4745         amdgpu_ring_write(ring, upper_32_bits(seq));
4746
4747 }
4748
4749 /**
4750  * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4751  *
4752  * @ring: amdgpu ring buffer object
4753  * @semaphore: amdgpu semaphore object
4754  * @emit_wait: Is this a sempahore wait?
4755  *
4756  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4757  * from running ahead of semaphore waits.
4758  */
4759 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4760                                          struct amdgpu_semaphore *semaphore,
4761                                          bool emit_wait)
4762 {
4763         uint64_t addr = semaphore->gpu_addr;
4764         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4765
4766         if (ring->adev->asic_type == CHIP_TOPAZ ||
4767             ring->adev->asic_type == CHIP_TONGA ||
4768             ring->adev->asic_type == CHIP_FIJI)
4769                 /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4770                 return false;
4771         else {
4772                 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4773                 amdgpu_ring_write(ring, lower_32_bits(addr));
4774                 amdgpu_ring_write(ring, upper_32_bits(addr));
4775                 amdgpu_ring_write(ring, sel);
4776         }
4777
4778         if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4779                 /* Prevent the PFP from running ahead of the semaphore wait */
4780                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4781                 amdgpu_ring_write(ring, 0x0);
4782         }
4783
4784         return true;
4785 }
4786
4787 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4788                                         unsigned vm_id, uint64_t pd_addr)
4789 {
4790         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4791         uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4792         uint64_t addr = ring->fence_drv.gpu_addr;
4793
4794         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4795         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4796                  WAIT_REG_MEM_FUNCTION(3))); /* equal */
4797         amdgpu_ring_write(ring, addr & 0xfffffffc);
4798         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4799         amdgpu_ring_write(ring, seq);
4800         amdgpu_ring_write(ring, 0xffffffff);
4801         amdgpu_ring_write(ring, 4); /* poll interval */
4802
4803         if (usepfp) {
4804                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4805                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4806                 amdgpu_ring_write(ring, 0);
4807                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4808                 amdgpu_ring_write(ring, 0);
4809         }
4810
4811         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4812         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4813                                  WRITE_DATA_DST_SEL(0)) |
4814                                  WR_CONFIRM);
4815         if (vm_id < 8) {
4816                 amdgpu_ring_write(ring,
4817                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4818         } else {
4819                 amdgpu_ring_write(ring,
4820                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4821         }
4822         amdgpu_ring_write(ring, 0);
4823         amdgpu_ring_write(ring, pd_addr >> 12);
4824
4825         /* bits 0-15 are the VM contexts0-15 */
4826         /* invalidate the cache */
4827         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4828         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4829                                  WRITE_DATA_DST_SEL(0)));
4830         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4831         amdgpu_ring_write(ring, 0);
4832         amdgpu_ring_write(ring, 1 << vm_id);
4833
4834         /* wait for the invalidate to complete */
4835         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4836         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4837                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
4838                                  WAIT_REG_MEM_ENGINE(0))); /* me */
4839         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4840         amdgpu_ring_write(ring, 0);
4841         amdgpu_ring_write(ring, 0); /* ref */
4842         amdgpu_ring_write(ring, 0); /* mask */
4843         amdgpu_ring_write(ring, 0x20); /* poll interval */
4844
4845         /* compute doesn't have PFP */
4846         if (usepfp) {
4847                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4848                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4849                 amdgpu_ring_write(ring, 0x0);
4850                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4851                 amdgpu_ring_write(ring, 0);
4852                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4853                 amdgpu_ring_write(ring, 0);
4854         }
4855 }
4856
4857 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4858 {
4859         return ring->adev->wb.wb[ring->rptr_offs];
4860 }
4861
4862 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4863 {
4864         return ring->adev->wb.wb[ring->wptr_offs];
4865 }
4866
4867 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4868 {
4869         struct amdgpu_device *adev = ring->adev;
4870
4871         /* XXX check if swapping is necessary on BE */
4872         adev->wb.wb[ring->wptr_offs] = ring->wptr;
4873         WDOORBELL32(ring->doorbell_index, ring->wptr);
4874 }
4875
4876 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4877                                              u64 addr, u64 seq,
4878                                              unsigned flags)
4879 {
4880         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4881         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4882
4883         /* RELEASE_MEM - flush caches, send int */
4884         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4885         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4886                                  EOP_TC_ACTION_EN |
4887                                  EOP_TC_WB_ACTION_EN |
4888                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4889                                  EVENT_INDEX(5)));
4890         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4891         amdgpu_ring_write(ring, addr & 0xfffffffc);
4892         amdgpu_ring_write(ring, upper_32_bits(addr));
4893         amdgpu_ring_write(ring, lower_32_bits(seq));
4894         amdgpu_ring_write(ring, upper_32_bits(seq));
4895 }
4896
4897 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4898                                                  enum amdgpu_interrupt_state state)
4899 {
4900         u32 cp_int_cntl;
4901
4902         switch (state) {
4903         case AMDGPU_IRQ_STATE_DISABLE:
4904                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4905                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4906                                             TIME_STAMP_INT_ENABLE, 0);
4907                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4908                 break;
4909         case AMDGPU_IRQ_STATE_ENABLE:
4910                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4911                 cp_int_cntl =
4912                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4913                                       TIME_STAMP_INT_ENABLE, 1);
4914                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4915                 break;
4916         default:
4917                 break;
4918         }
4919 }
4920
4921 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4922                                                      int me, int pipe,
4923                                                      enum amdgpu_interrupt_state state)
4924 {
4925         u32 mec_int_cntl, mec_int_cntl_reg;
4926
4927         /*
4928          * amdgpu controls only pipe 0 of MEC1. That's why this function only
4929          * handles the setting of interrupts for this specific pipe. All other
4930          * pipes' interrupts are set by amdkfd.
4931          */
4932
4933         if (me == 1) {
4934                 switch (pipe) {
4935                 case 0:
4936                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4937                         break;
4938                 default:
4939                         DRM_DEBUG("invalid pipe %d\n", pipe);
4940                         return;
4941                 }
4942         } else {
4943                 DRM_DEBUG("invalid me %d\n", me);
4944                 return;
4945         }
4946
4947         switch (state) {
4948         case AMDGPU_IRQ_STATE_DISABLE:
4949                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4950                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4951                                              TIME_STAMP_INT_ENABLE, 0);
4952                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4953                 break;
4954         case AMDGPU_IRQ_STATE_ENABLE:
4955                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4956                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4957                                              TIME_STAMP_INT_ENABLE, 1);
4958                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4959                 break;
4960         default:
4961                 break;
4962         }
4963 }
4964
4965 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4966                                              struct amdgpu_irq_src *source,
4967                                              unsigned type,
4968                                              enum amdgpu_interrupt_state state)
4969 {
4970         u32 cp_int_cntl;
4971
4972         switch (state) {
4973         case AMDGPU_IRQ_STATE_DISABLE:
4974                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4975                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4976                                             PRIV_REG_INT_ENABLE, 0);
4977                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4978                 break;
4979         case AMDGPU_IRQ_STATE_ENABLE:
4980                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4981                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4982                                             PRIV_REG_INT_ENABLE, 0);
4983                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4984                 break;
4985         default:
4986                 break;
4987         }
4988
4989         return 0;
4990 }
4991
4992 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4993                                               struct amdgpu_irq_src *source,
4994                                               unsigned type,
4995                                               enum amdgpu_interrupt_state state)
4996 {
4997         u32 cp_int_cntl;
4998
4999         switch (state) {
5000         case AMDGPU_IRQ_STATE_DISABLE:
5001                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5002                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5003                                             PRIV_INSTR_INT_ENABLE, 0);
5004                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5005                 break;
5006         case AMDGPU_IRQ_STATE_ENABLE:
5007                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5008                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5009                                             PRIV_INSTR_INT_ENABLE, 1);
5010                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5011                 break;
5012         default:
5013                 break;
5014         }
5015
5016         return 0;
5017 }
5018
5019 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5020                                             struct amdgpu_irq_src *src,
5021                                             unsigned type,
5022                                             enum amdgpu_interrupt_state state)
5023 {
5024         switch (type) {
5025         case AMDGPU_CP_IRQ_GFX_EOP:
5026                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5027                 break;
5028         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5029                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5030                 break;
5031         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5032                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5033                 break;
5034         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5035                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5036                 break;
5037         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5038                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5039                 break;
5040         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5041                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5042                 break;
5043         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5044                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5045                 break;
5046         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5047                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5048                 break;
5049         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5050                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5051                 break;
5052         default:
5053                 break;
5054         }
5055         return 0;
5056 }
5057
5058 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5059                             struct amdgpu_irq_src *source,
5060                             struct amdgpu_iv_entry *entry)
5061 {
5062         int i;
5063         u8 me_id, pipe_id, queue_id;
5064         struct amdgpu_ring *ring;
5065
5066         DRM_DEBUG("IH: CP EOP\n");
5067         me_id = (entry->ring_id & 0x0c) >> 2;
5068         pipe_id = (entry->ring_id & 0x03) >> 0;
5069         queue_id = (entry->ring_id & 0x70) >> 4;
5070
5071         switch (me_id) {
5072         case 0:
5073                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5074                 break;
5075         case 1:
5076         case 2:
5077                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5078                         ring = &adev->gfx.compute_ring[i];
5079                         /* Per-queue interrupt is supported for MEC starting from VI.
5080                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5081                           */
5082                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5083                                 amdgpu_fence_process(ring);
5084                 }
5085                 break;
5086         }
5087         return 0;
5088 }
5089
5090 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5091                                  struct amdgpu_irq_src *source,
5092                                  struct amdgpu_iv_entry *entry)
5093 {
5094         DRM_ERROR("Illegal register access in command stream\n");
5095         schedule_work(&adev->reset_work);
5096         return 0;
5097 }
5098
5099 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5100                                   struct amdgpu_irq_src *source,
5101                                   struct amdgpu_iv_entry *entry)
5102 {
5103         DRM_ERROR("Illegal instruction in command stream\n");
5104         schedule_work(&adev->reset_work);
5105         return 0;
5106 }
5107
5108 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5109         .early_init = gfx_v8_0_early_init,
5110         .late_init = gfx_v8_0_late_init,
5111         .sw_init = gfx_v8_0_sw_init,
5112         .sw_fini = gfx_v8_0_sw_fini,
5113         .hw_init = gfx_v8_0_hw_init,
5114         .hw_fini = gfx_v8_0_hw_fini,
5115         .suspend = gfx_v8_0_suspend,
5116         .resume = gfx_v8_0_resume,
5117         .is_idle = gfx_v8_0_is_idle,
5118         .wait_for_idle = gfx_v8_0_wait_for_idle,
5119         .soft_reset = gfx_v8_0_soft_reset,
5120         .print_status = gfx_v8_0_print_status,
5121         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5122         .set_powergating_state = gfx_v8_0_set_powergating_state,
5123 };
5124
5125 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5126         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5127         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5128         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5129         .parse_cs = NULL,
5130         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5131         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5132         .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5133         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5134         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5135         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5136         .test_ring = gfx_v8_0_ring_test_ring,
5137         .test_ib = gfx_v8_0_ring_test_ib,
5138         .insert_nop = amdgpu_ring_insert_nop,
5139 };
5140
5141 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5142         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5143         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5144         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5145         .parse_cs = NULL,
5146         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5147         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5148         .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5149         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5150         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5151         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5152         .test_ring = gfx_v8_0_ring_test_ring,
5153         .test_ib = gfx_v8_0_ring_test_ib,
5154         .insert_nop = amdgpu_ring_insert_nop,
5155 };
5156
5157 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5158 {
5159         int i;
5160
5161         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5162                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5163
5164         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5165                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5166 }
5167
5168 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5169         .set = gfx_v8_0_set_eop_interrupt_state,
5170         .process = gfx_v8_0_eop_irq,
5171 };
5172
5173 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5174         .set = gfx_v8_0_set_priv_reg_fault_state,
5175         .process = gfx_v8_0_priv_reg_irq,
5176 };
5177
5178 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5179         .set = gfx_v8_0_set_priv_inst_fault_state,
5180         .process = gfx_v8_0_priv_inst_irq,
5181 };
5182
5183 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5184 {
5185         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5186         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5187
5188         adev->gfx.priv_reg_irq.num_types = 1;
5189         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5190
5191         adev->gfx.priv_inst_irq.num_types = 1;
5192         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5193 }
5194
5195 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5196 {
5197         /* init asci gds info */
5198         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5199         adev->gds.gws.total_size = 64;
5200         adev->gds.oa.total_size = 16;
5201
5202         if (adev->gds.mem.total_size == 64 * 1024) {
5203                 adev->gds.mem.gfx_partition_size = 4096;
5204                 adev->gds.mem.cs_partition_size = 4096;
5205
5206                 adev->gds.gws.gfx_partition_size = 4;
5207                 adev->gds.gws.cs_partition_size = 4;
5208
5209                 adev->gds.oa.gfx_partition_size = 4;
5210                 adev->gds.oa.cs_partition_size = 1;
5211         } else {
5212                 adev->gds.mem.gfx_partition_size = 1024;
5213                 adev->gds.mem.cs_partition_size = 1024;
5214
5215                 adev->gds.gws.gfx_partition_size = 16;
5216                 adev->gds.gws.cs_partition_size = 16;
5217
5218                 adev->gds.oa.gfx_partition_size = 4;
5219                 adev->gds.oa.cs_partition_size = 4;
5220         }
5221 }
5222
5223 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5224                 u32 se, u32 sh)
5225 {
5226         u32 mask = 0, tmp, tmp1;
5227         int i;
5228
5229         gfx_v8_0_select_se_sh(adev, se, sh);
5230         tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5231         tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5232         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5233
5234         tmp &= 0xffff0000;
5235
5236         tmp |= tmp1;
5237         tmp >>= 16;
5238
5239         for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5240                 mask <<= 1;
5241                 mask |= 1;
5242         }
5243
5244         return (~tmp) & mask;
5245 }
5246
5247 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5248                                                  struct amdgpu_cu_info *cu_info)
5249 {
5250         int i, j, k, counter, active_cu_number = 0;
5251         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5252
5253         if (!adev || !cu_info)
5254                 return -EINVAL;
5255
5256         mutex_lock(&adev->grbm_idx_mutex);
5257         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5258                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5259                         mask = 1;
5260                         ao_bitmap = 0;
5261                         counter = 0;
5262                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5263                         cu_info->bitmap[i][j] = bitmap;
5264
5265                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5266                                 if (bitmap & mask) {
5267                                         if (counter < 2)
5268                                                 ao_bitmap |= mask;
5269                                         counter ++;
5270                                 }
5271                                 mask <<= 1;
5272                         }
5273                         active_cu_number += counter;
5274                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5275                 }
5276         }
5277
5278         cu_info->number = active_cu_number;
5279         cu_info->ao_cu_mask = ao_cu_mask;
5280         mutex_unlock(&adev->grbm_idx_mutex);
5281         return 0;
5282 }