]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
89d7b1576a66754a05c7c52d80d08359f7ca8c39
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define BAFFIN_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57
58 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78
79 /* BPM Register Address*/
80 enum {
81         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86         BPM_REG_FGCG_MAX
87 };
88
89 #define RLC_FormatDirectRegListLength        14
90
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123
124 MODULE_FIRMWARE("amdgpu/baffin_ce.bin");
125 MODULE_FIRMWARE("amdgpu/baffin_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/baffin_me.bin");
127 MODULE_FIRMWARE("amdgpu/baffin_mec.bin");
128 MODULE_FIRMWARE("amdgpu/baffin_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/baffin_rlc.bin");
130
131 MODULE_FIRMWARE("amdgpu/ellesmere_ce.bin");
132 MODULE_FIRMWARE("amdgpu/ellesmere_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/ellesmere_me.bin");
134 MODULE_FIRMWARE("amdgpu/ellesmere_mec.bin");
135 MODULE_FIRMWARE("amdgpu/ellesmere_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/ellesmere_rlc.bin");
137
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
139 {
140         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
156 };
157
158 static const u32 golden_settings_tonga_a11[] =
159 {
160         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163         mmGB_GPU_ID, 0x0000000f, 0x00000000,
164         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
175 };
176
177 static const u32 tonga_golden_common_all[] =
178 {
179         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
187 };
188
189 static const u32 tonga_mgcg_cgcg_init[] =
190 {
191         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
266 };
267
268 static const u32 golden_settings_baffin_a11[] =
269 {
270         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
271         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
272         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
273         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
274         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
276         mmSQ_CONFIG, 0x07f80000, 0x07180000,
277         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
278         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
279         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
280         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
281         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
282 };
283
284 static const u32 baffin_golden_common_all[] =
285 {
286         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
287         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
288         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
289         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
290         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
291         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
292         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
293         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
294 };
295
296 static const u32 golden_settings_ellesmere_a11[] =
297 {
298         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
299         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
305         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307         mmSQ_CONFIG, 0x07f80000, 0x07180000,
308         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
311         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312 };
313
314 static const u32 ellesmere_golden_common_all[] =
315 {
316         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
317         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
318         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
319         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
320         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
323         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
324 };
325
326 static const u32 fiji_golden_common_all[] =
327 {
328         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
329         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
330         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
336         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
338 };
339
340 static const u32 golden_settings_fiji_a10[] =
341 {
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
347         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
348         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
351         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
352         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
353 };
354
355 static const u32 fiji_mgcg_cgcg_init[] =
356 {
357         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
358         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
360         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
361         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
362         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
364         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
366         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
368         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
375         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
378         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
379         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
382         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
383         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
384         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
385         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
386         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
387         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
389         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
390         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
391         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
392 };
393
394 static const u32 golden_settings_iceland_a11[] =
395 {
396         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
397         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
398         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
399         mmGB_GPU_ID, 0x0000000f, 0x00000000,
400         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
401         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
402         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
403         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
404         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
405         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
406         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
407         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
408         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
409         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
410         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
411 };
412
413 static const u32 iceland_golden_common_all[] =
414 {
415         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
416         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
417         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
418         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
419         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
420         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
421         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
422         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
423 };
424
425 static const u32 iceland_mgcg_cgcg_init[] =
426 {
427         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
428         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
429         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
430         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
431         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
432         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
433         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
434         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
436         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
438         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
449         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
450         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
452         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
453         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
454         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
455         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
458         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
459         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
460         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
461         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
462         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
463         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
464         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
465         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
466         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
467         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
468         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
471         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
481         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
489         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
490         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
491 };
492
493 static const u32 cz_golden_settings_a11[] =
494 {
495         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
496         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
497         mmGB_GPU_ID, 0x0000000f, 0x00000000,
498         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
499         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
500         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
501         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
502         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
503         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
504         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
505 };
506
507 static const u32 cz_golden_common_all[] =
508 {
509         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
510         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
511         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
512         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
513         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
514         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
515         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
516         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
517 };
518
519 static const u32 cz_mgcg_cgcg_init[] =
520 {
521         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
522         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
523         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
524         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
526         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
530         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
532         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
535         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
539         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
540         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
541         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
542         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
543         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
544         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
546         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
547         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
548         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
549         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
550         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
551         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
552         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
555         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
563         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
564         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
565         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
566         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
567         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
568         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
569         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
570         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
571         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
572         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
575         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
580         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
585         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
590         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
593         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
594         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
595         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
596 };
597
598 static const u32 stoney_golden_settings_a11[] =
599 {
600         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
601         mmGB_GPU_ID, 0x0000000f, 0x00000000,
602         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
603         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
604         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
605         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
606         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
607         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
608         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
609         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
610 };
611
612 static const u32 stoney_golden_common_all[] =
613 {
614         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
615         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
616         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
617         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
618         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
619         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
620         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
621         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
622 };
623
624 static const u32 stoney_mgcg_cgcg_init[] =
625 {
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
628         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
629         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
630         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
631         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
632 };
633
634 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
635 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
636 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
637 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
638 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
639
640 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
641 {
642         switch (adev->asic_type) {
643         case CHIP_TOPAZ:
644                 amdgpu_program_register_sequence(adev,
645                                                  iceland_mgcg_cgcg_init,
646                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
647                 amdgpu_program_register_sequence(adev,
648                                                  golden_settings_iceland_a11,
649                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
650                 amdgpu_program_register_sequence(adev,
651                                                  iceland_golden_common_all,
652                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
653                 break;
654         case CHIP_FIJI:
655                 amdgpu_program_register_sequence(adev,
656                                                  fiji_mgcg_cgcg_init,
657                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
658                 amdgpu_program_register_sequence(adev,
659                                                  golden_settings_fiji_a10,
660                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
661                 amdgpu_program_register_sequence(adev,
662                                                  fiji_golden_common_all,
663                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
664                 break;
665
666         case CHIP_TONGA:
667                 amdgpu_program_register_sequence(adev,
668                                                  tonga_mgcg_cgcg_init,
669                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
670                 amdgpu_program_register_sequence(adev,
671                                                  golden_settings_tonga_a11,
672                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
673                 amdgpu_program_register_sequence(adev,
674                                                  tonga_golden_common_all,
675                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
676                 break;
677         case CHIP_BAFFIN:
678                 amdgpu_program_register_sequence(adev,
679                                                  golden_settings_baffin_a11,
680                                                  (const u32)ARRAY_SIZE(golden_settings_baffin_a11));
681                 amdgpu_program_register_sequence(adev,
682                                                  baffin_golden_common_all,
683                                                  (const u32)ARRAY_SIZE(baffin_golden_common_all));
684                 break;
685         case CHIP_ELLESMERE:
686                 amdgpu_program_register_sequence(adev,
687                                                  golden_settings_ellesmere_a11,
688                                                  (const u32)ARRAY_SIZE(golden_settings_ellesmere_a11));
689                 amdgpu_program_register_sequence(adev,
690                                                  ellesmere_golden_common_all,
691                                                  (const u32)ARRAY_SIZE(ellesmere_golden_common_all));
692                 break;
693         case CHIP_CARRIZO:
694                 amdgpu_program_register_sequence(adev,
695                                                  cz_mgcg_cgcg_init,
696                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
697                 amdgpu_program_register_sequence(adev,
698                                                  cz_golden_settings_a11,
699                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
700                 amdgpu_program_register_sequence(adev,
701                                                  cz_golden_common_all,
702                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
703                 break;
704         case CHIP_STONEY:
705                 amdgpu_program_register_sequence(adev,
706                                                  stoney_mgcg_cgcg_init,
707                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
708                 amdgpu_program_register_sequence(adev,
709                                                  stoney_golden_settings_a11,
710                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
711                 amdgpu_program_register_sequence(adev,
712                                                  stoney_golden_common_all,
713                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
714                 break;
715         default:
716                 break;
717         }
718 }
719
720 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
721 {
722         int i;
723
724         adev->gfx.scratch.num_reg = 7;
725         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
726         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
727                 adev->gfx.scratch.free[i] = true;
728                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
729         }
730 }
731
732 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
733 {
734         struct amdgpu_device *adev = ring->adev;
735         uint32_t scratch;
736         uint32_t tmp = 0;
737         unsigned i;
738         int r;
739
740         r = amdgpu_gfx_scratch_get(adev, &scratch);
741         if (r) {
742                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
743                 return r;
744         }
745         WREG32(scratch, 0xCAFEDEAD);
746         r = amdgpu_ring_alloc(ring, 3);
747         if (r) {
748                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
749                           ring->idx, r);
750                 amdgpu_gfx_scratch_free(adev, scratch);
751                 return r;
752         }
753         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
754         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
755         amdgpu_ring_write(ring, 0xDEADBEEF);
756         amdgpu_ring_commit(ring);
757
758         for (i = 0; i < adev->usec_timeout; i++) {
759                 tmp = RREG32(scratch);
760                 if (tmp == 0xDEADBEEF)
761                         break;
762                 DRM_UDELAY(1);
763         }
764         if (i < adev->usec_timeout) {
765                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
766                          ring->idx, i);
767         } else {
768                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
769                           ring->idx, scratch, tmp);
770                 r = -EINVAL;
771         }
772         amdgpu_gfx_scratch_free(adev, scratch);
773         return r;
774 }
775
776 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
777 {
778         struct amdgpu_device *adev = ring->adev;
779         struct amdgpu_ib ib;
780         struct fence *f = NULL;
781         uint32_t scratch;
782         uint32_t tmp = 0;
783         unsigned i;
784         int r;
785
786         r = amdgpu_gfx_scratch_get(adev, &scratch);
787         if (r) {
788                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
789                 return r;
790         }
791         WREG32(scratch, 0xCAFEDEAD);
792         memset(&ib, 0, sizeof(ib));
793         r = amdgpu_ib_get(adev, NULL, 256, &ib);
794         if (r) {
795                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
796                 goto err1;
797         }
798         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
799         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
800         ib.ptr[2] = 0xDEADBEEF;
801         ib.length_dw = 3;
802
803         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
804         if (r)
805                 goto err2;
806
807         r = fence_wait(f, false);
808         if (r) {
809                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
810                 goto err2;
811         }
812         for (i = 0; i < adev->usec_timeout; i++) {
813                 tmp = RREG32(scratch);
814                 if (tmp == 0xDEADBEEF)
815                         break;
816                 DRM_UDELAY(1);
817         }
818         if (i < adev->usec_timeout) {
819                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
820                          ring->idx, i);
821                 goto err2;
822         } else {
823                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
824                           scratch, tmp);
825                 r = -EINVAL;
826         }
827 err2:
828         fence_put(f);
829         amdgpu_ib_free(adev, &ib, NULL);
830         fence_put(f);
831 err1:
832         amdgpu_gfx_scratch_free(adev, scratch);
833         return r;
834 }
835
836 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
837 {
838         const char *chip_name;
839         char fw_name[30];
840         int err;
841         struct amdgpu_firmware_info *info = NULL;
842         const struct common_firmware_header *header = NULL;
843         const struct gfx_firmware_header_v1_0 *cp_hdr;
844         const struct rlc_firmware_header_v2_0 *rlc_hdr;
845         unsigned int *tmp = NULL, i;
846
847         DRM_DEBUG("\n");
848
849         switch (adev->asic_type) {
850         case CHIP_TOPAZ:
851                 chip_name = "topaz";
852                 break;
853         case CHIP_TONGA:
854                 chip_name = "tonga";
855                 break;
856         case CHIP_CARRIZO:
857                 chip_name = "carrizo";
858                 break;
859         case CHIP_FIJI:
860                 chip_name = "fiji";
861                 break;
862         case CHIP_BAFFIN:
863                 chip_name = "baffin";
864                 break;
865         case CHIP_ELLESMERE:
866                 chip_name = "ellesmere";
867                 break;
868         case CHIP_STONEY:
869                 chip_name = "stoney";
870                 break;
871         default:
872                 BUG();
873         }
874
875         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
876         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
877         if (err)
878                 goto out;
879         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
880         if (err)
881                 goto out;
882         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
883         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
884         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
885
886         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
887         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
888         if (err)
889                 goto out;
890         err = amdgpu_ucode_validate(adev->gfx.me_fw);
891         if (err)
892                 goto out;
893         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
894         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
895         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
896
897         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
898         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
899         if (err)
900                 goto out;
901         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
902         if (err)
903                 goto out;
904         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
905         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
906         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
907
908         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
909         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
910         if (err)
911                 goto out;
912         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
913         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
914         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
915         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
916
917         adev->gfx.rlc.save_and_restore_offset =
918                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
919         adev->gfx.rlc.clear_state_descriptor_offset =
920                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
921         adev->gfx.rlc.avail_scratch_ram_locations =
922                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
923         adev->gfx.rlc.reg_restore_list_size =
924                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
925         adev->gfx.rlc.reg_list_format_start =
926                         le32_to_cpu(rlc_hdr->reg_list_format_start);
927         adev->gfx.rlc.reg_list_format_separate_start =
928                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
929         adev->gfx.rlc.starting_offsets_start =
930                         le32_to_cpu(rlc_hdr->starting_offsets_start);
931         adev->gfx.rlc.reg_list_format_size_bytes =
932                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
933         adev->gfx.rlc.reg_list_size_bytes =
934                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
935
936         adev->gfx.rlc.register_list_format =
937                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
938                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
939
940         if (!adev->gfx.rlc.register_list_format) {
941                 err = -ENOMEM;
942                 goto out;
943         }
944
945         tmp = (unsigned int *)((uint64_t)rlc_hdr +
946                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
947         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
948                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
949
950         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
951
952         tmp = (unsigned int *)((uint64_t)rlc_hdr +
953                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
954         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
955                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
956
957         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
958         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
959         if (err)
960                 goto out;
961         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
962         if (err)
963                 goto out;
964         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
965         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
966         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
967
968         if ((adev->asic_type != CHIP_STONEY) &&
969             (adev->asic_type != CHIP_TOPAZ)) {
970                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
971                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
972                 if (!err) {
973                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
974                         if (err)
975                                 goto out;
976                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
977                                 adev->gfx.mec2_fw->data;
978                         adev->gfx.mec2_fw_version =
979                                 le32_to_cpu(cp_hdr->header.ucode_version);
980                         adev->gfx.mec2_feature_version =
981                                 le32_to_cpu(cp_hdr->ucode_feature_version);
982                 } else {
983                         err = 0;
984                         adev->gfx.mec2_fw = NULL;
985                 }
986         }
987
988         if (adev->firmware.smu_load) {
989                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
990                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
991                 info->fw = adev->gfx.pfp_fw;
992                 header = (const struct common_firmware_header *)info->fw->data;
993                 adev->firmware.fw_size +=
994                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
995
996                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
997                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
998                 info->fw = adev->gfx.me_fw;
999                 header = (const struct common_firmware_header *)info->fw->data;
1000                 adev->firmware.fw_size +=
1001                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1002
1003                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1004                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1005                 info->fw = adev->gfx.ce_fw;
1006                 header = (const struct common_firmware_header *)info->fw->data;
1007                 adev->firmware.fw_size +=
1008                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1009
1010                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1011                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1012                 info->fw = adev->gfx.rlc_fw;
1013                 header = (const struct common_firmware_header *)info->fw->data;
1014                 adev->firmware.fw_size +=
1015                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1016
1017                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1018                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1019                 info->fw = adev->gfx.mec_fw;
1020                 header = (const struct common_firmware_header *)info->fw->data;
1021                 adev->firmware.fw_size +=
1022                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1023
1024                 if (adev->gfx.mec2_fw) {
1025                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1026                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1027                         info->fw = adev->gfx.mec2_fw;
1028                         header = (const struct common_firmware_header *)info->fw->data;
1029                         adev->firmware.fw_size +=
1030                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1031                 }
1032
1033         }
1034
1035 out:
1036         if (err) {
1037                 dev_err(adev->dev,
1038                         "gfx8: Failed to load firmware \"%s\"\n",
1039                         fw_name);
1040                 release_firmware(adev->gfx.pfp_fw);
1041                 adev->gfx.pfp_fw = NULL;
1042                 release_firmware(adev->gfx.me_fw);
1043                 adev->gfx.me_fw = NULL;
1044                 release_firmware(adev->gfx.ce_fw);
1045                 adev->gfx.ce_fw = NULL;
1046                 release_firmware(adev->gfx.rlc_fw);
1047                 adev->gfx.rlc_fw = NULL;
1048                 release_firmware(adev->gfx.mec_fw);
1049                 adev->gfx.mec_fw = NULL;
1050                 release_firmware(adev->gfx.mec2_fw);
1051                 adev->gfx.mec2_fw = NULL;
1052         }
1053         return err;
1054 }
1055
1056 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1057                                     volatile u32 *buffer)
1058 {
1059         u32 count = 0, i;
1060         const struct cs_section_def *sect = NULL;
1061         const struct cs_extent_def *ext = NULL;
1062
1063         if (adev->gfx.rlc.cs_data == NULL)
1064                 return;
1065         if (buffer == NULL)
1066                 return;
1067
1068         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1069         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1070
1071         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1072         buffer[count++] = cpu_to_le32(0x80000000);
1073         buffer[count++] = cpu_to_le32(0x80000000);
1074
1075         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1076                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1077                         if (sect->id == SECT_CONTEXT) {
1078                                 buffer[count++] =
1079                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1080                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1081                                                 PACKET3_SET_CONTEXT_REG_START);
1082                                 for (i = 0; i < ext->reg_count; i++)
1083                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1084                         } else {
1085                                 return;
1086                         }
1087                 }
1088         }
1089
1090         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1091         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1092                         PACKET3_SET_CONTEXT_REG_START);
1093         switch (adev->asic_type) {
1094         case CHIP_TONGA:
1095                 buffer[count++] = cpu_to_le32(0x16000012);
1096                 buffer[count++] = cpu_to_le32(0x0000002A);
1097                 break;
1098         case CHIP_FIJI:
1099                 buffer[count++] = cpu_to_le32(0x3a00161a);
1100                 buffer[count++] = cpu_to_le32(0x0000002e);
1101                 break;
1102         case CHIP_TOPAZ:
1103         case CHIP_CARRIZO:
1104                 buffer[count++] = cpu_to_le32(0x00000002);
1105                 buffer[count++] = cpu_to_le32(0x00000000);
1106                 break;
1107         case CHIP_STONEY:
1108                 buffer[count++] = cpu_to_le32(0x00000000);
1109                 buffer[count++] = cpu_to_le32(0x00000000);
1110                 break;
1111         default:
1112                 buffer[count++] = cpu_to_le32(0x00000000);
1113                 buffer[count++] = cpu_to_le32(0x00000000);
1114                 break;
1115         }
1116
1117         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1118         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1119
1120         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1121         buffer[count++] = cpu_to_le32(0);
1122 }
1123
1124 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1125 {
1126         int r;
1127
1128         /* clear state block */
1129         if (adev->gfx.rlc.clear_state_obj) {
1130                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1131                 if (unlikely(r != 0))
1132                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1133                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1134                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1135
1136                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1137                 adev->gfx.rlc.clear_state_obj = NULL;
1138         }
1139 }
1140
1141 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1142 {
1143         volatile u32 *dst_ptr;
1144         u32 dws;
1145         const struct cs_section_def *cs_data;
1146         int r;
1147
1148         adev->gfx.rlc.cs_data = vi_cs_data;
1149
1150         cs_data = adev->gfx.rlc.cs_data;
1151
1152         if (cs_data) {
1153                 /* clear state block */
1154                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1155
1156                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1157                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1158                                              AMDGPU_GEM_DOMAIN_VRAM,
1159                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1160                                              NULL, NULL,
1161                                              &adev->gfx.rlc.clear_state_obj);
1162                         if (r) {
1163                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1164                                 gfx_v8_0_rlc_fini(adev);
1165                                 return r;
1166                         }
1167                 }
1168                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1169                 if (unlikely(r != 0)) {
1170                         gfx_v8_0_rlc_fini(adev);
1171                         return r;
1172                 }
1173                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1174                                   &adev->gfx.rlc.clear_state_gpu_addr);
1175                 if (r) {
1176                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1177                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1178                         gfx_v8_0_rlc_fini(adev);
1179                         return r;
1180                 }
1181
1182                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1183                 if (r) {
1184                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1185                         gfx_v8_0_rlc_fini(adev);
1186                         return r;
1187                 }
1188                 /* set up the cs buffer */
1189                 dst_ptr = adev->gfx.rlc.cs_ptr;
1190                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1191                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1192                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1193         }
1194
1195         return 0;
1196 }
1197
1198 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1199 {
1200         int r;
1201
1202         if (adev->gfx.mec.hpd_eop_obj) {
1203                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1204                 if (unlikely(r != 0))
1205                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1206                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1207                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1208
1209                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1210                 adev->gfx.mec.hpd_eop_obj = NULL;
1211         }
1212 }
1213
1214 #define MEC_HPD_SIZE 2048
1215
1216 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1217 {
1218         int r;
1219         u32 *hpd;
1220
1221         /*
1222          * we assign only 1 pipe because all other pipes will
1223          * be handled by KFD
1224          */
1225         adev->gfx.mec.num_mec = 1;
1226         adev->gfx.mec.num_pipe = 1;
1227         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1228
1229         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1230                 r = amdgpu_bo_create(adev,
1231                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1232                                      PAGE_SIZE, true,
1233                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1234                                      &adev->gfx.mec.hpd_eop_obj);
1235                 if (r) {
1236                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1237                         return r;
1238                 }
1239         }
1240
1241         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1242         if (unlikely(r != 0)) {
1243                 gfx_v8_0_mec_fini(adev);
1244                 return r;
1245         }
1246         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1247                           &adev->gfx.mec.hpd_eop_gpu_addr);
1248         if (r) {
1249                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1250                 gfx_v8_0_mec_fini(adev);
1251                 return r;
1252         }
1253         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1254         if (r) {
1255                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1256                 gfx_v8_0_mec_fini(adev);
1257                 return r;
1258         }
1259
1260         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1261
1262         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1263         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1264
1265         return 0;
1266 }
1267
1268 static const u32 vgpr_init_compute_shader[] =
1269 {
1270         0x7e000209, 0x7e020208,
1271         0x7e040207, 0x7e060206,
1272         0x7e080205, 0x7e0a0204,
1273         0x7e0c0203, 0x7e0e0202,
1274         0x7e100201, 0x7e120200,
1275         0x7e140209, 0x7e160208,
1276         0x7e180207, 0x7e1a0206,
1277         0x7e1c0205, 0x7e1e0204,
1278         0x7e200203, 0x7e220202,
1279         0x7e240201, 0x7e260200,
1280         0x7e280209, 0x7e2a0208,
1281         0x7e2c0207, 0x7e2e0206,
1282         0x7e300205, 0x7e320204,
1283         0x7e340203, 0x7e360202,
1284         0x7e380201, 0x7e3a0200,
1285         0x7e3c0209, 0x7e3e0208,
1286         0x7e400207, 0x7e420206,
1287         0x7e440205, 0x7e460204,
1288         0x7e480203, 0x7e4a0202,
1289         0x7e4c0201, 0x7e4e0200,
1290         0x7e500209, 0x7e520208,
1291         0x7e540207, 0x7e560206,
1292         0x7e580205, 0x7e5a0204,
1293         0x7e5c0203, 0x7e5e0202,
1294         0x7e600201, 0x7e620200,
1295         0x7e640209, 0x7e660208,
1296         0x7e680207, 0x7e6a0206,
1297         0x7e6c0205, 0x7e6e0204,
1298         0x7e700203, 0x7e720202,
1299         0x7e740201, 0x7e760200,
1300         0x7e780209, 0x7e7a0208,
1301         0x7e7c0207, 0x7e7e0206,
1302         0xbf8a0000, 0xbf810000,
1303 };
1304
1305 static const u32 sgpr_init_compute_shader[] =
1306 {
1307         0xbe8a0100, 0xbe8c0102,
1308         0xbe8e0104, 0xbe900106,
1309         0xbe920108, 0xbe940100,
1310         0xbe960102, 0xbe980104,
1311         0xbe9a0106, 0xbe9c0108,
1312         0xbe9e0100, 0xbea00102,
1313         0xbea20104, 0xbea40106,
1314         0xbea60108, 0xbea80100,
1315         0xbeaa0102, 0xbeac0104,
1316         0xbeae0106, 0xbeb00108,
1317         0xbeb20100, 0xbeb40102,
1318         0xbeb60104, 0xbeb80106,
1319         0xbeba0108, 0xbebc0100,
1320         0xbebe0102, 0xbec00104,
1321         0xbec20106, 0xbec40108,
1322         0xbec60100, 0xbec80102,
1323         0xbee60004, 0xbee70005,
1324         0xbeea0006, 0xbeeb0007,
1325         0xbee80008, 0xbee90009,
1326         0xbefc0000, 0xbf8a0000,
1327         0xbf810000, 0x00000000,
1328 };
1329
1330 static const u32 vgpr_init_regs[] =
1331 {
1332         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1333         mmCOMPUTE_RESOURCE_LIMITS, 0,
1334         mmCOMPUTE_NUM_THREAD_X, 256*4,
1335         mmCOMPUTE_NUM_THREAD_Y, 1,
1336         mmCOMPUTE_NUM_THREAD_Z, 1,
1337         mmCOMPUTE_PGM_RSRC2, 20,
1338         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1339         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1340         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1341         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1342         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1343         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1344         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1345         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1346         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1347         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1348 };
1349
1350 static const u32 sgpr1_init_regs[] =
1351 {
1352         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1353         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1354         mmCOMPUTE_NUM_THREAD_X, 256*5,
1355         mmCOMPUTE_NUM_THREAD_Y, 1,
1356         mmCOMPUTE_NUM_THREAD_Z, 1,
1357         mmCOMPUTE_PGM_RSRC2, 20,
1358         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1359         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1360         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1361         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1362         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1363         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1364         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1365         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1366         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1367         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1368 };
1369
1370 static const u32 sgpr2_init_regs[] =
1371 {
1372         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1373         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1374         mmCOMPUTE_NUM_THREAD_X, 256*5,
1375         mmCOMPUTE_NUM_THREAD_Y, 1,
1376         mmCOMPUTE_NUM_THREAD_Z, 1,
1377         mmCOMPUTE_PGM_RSRC2, 20,
1378         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1379         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1380         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1381         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1382         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1383         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1384         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1385         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1386         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1387         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1388 };
1389
1390 static const u32 sec_ded_counter_registers[] =
1391 {
1392         mmCPC_EDC_ATC_CNT,
1393         mmCPC_EDC_SCRATCH_CNT,
1394         mmCPC_EDC_UCODE_CNT,
1395         mmCPF_EDC_ATC_CNT,
1396         mmCPF_EDC_ROQ_CNT,
1397         mmCPF_EDC_TAG_CNT,
1398         mmCPG_EDC_ATC_CNT,
1399         mmCPG_EDC_DMA_CNT,
1400         mmCPG_EDC_TAG_CNT,
1401         mmDC_EDC_CSINVOC_CNT,
1402         mmDC_EDC_RESTORE_CNT,
1403         mmDC_EDC_STATE_CNT,
1404         mmGDS_EDC_CNT,
1405         mmGDS_EDC_GRBM_CNT,
1406         mmGDS_EDC_OA_DED,
1407         mmSPI_EDC_CNT,
1408         mmSQC_ATC_EDC_GATCL1_CNT,
1409         mmSQC_EDC_CNT,
1410         mmSQ_EDC_DED_CNT,
1411         mmSQ_EDC_INFO,
1412         mmSQ_EDC_SEC_CNT,
1413         mmTCC_EDC_CNT,
1414         mmTCP_ATC_EDC_GATCL1_CNT,
1415         mmTCP_EDC_CNT,
1416         mmTD_EDC_CNT
1417 };
1418
1419 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1420 {
1421         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1422         struct amdgpu_ib ib;
1423         struct fence *f = NULL;
1424         int r, i;
1425         u32 tmp;
1426         unsigned total_size, vgpr_offset, sgpr_offset;
1427         u64 gpu_addr;
1428
1429         /* only supported on CZ */
1430         if (adev->asic_type != CHIP_CARRIZO)
1431                 return 0;
1432
1433         /* bail if the compute ring is not ready */
1434         if (!ring->ready)
1435                 return 0;
1436
1437         tmp = RREG32(mmGB_EDC_MODE);
1438         WREG32(mmGB_EDC_MODE, 0);
1439
1440         total_size =
1441                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1442         total_size +=
1443                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1444         total_size +=
1445                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1446         total_size = ALIGN(total_size, 256);
1447         vgpr_offset = total_size;
1448         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1449         sgpr_offset = total_size;
1450         total_size += sizeof(sgpr_init_compute_shader);
1451
1452         /* allocate an indirect buffer to put the commands in */
1453         memset(&ib, 0, sizeof(ib));
1454         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1455         if (r) {
1456                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1457                 return r;
1458         }
1459
1460         /* load the compute shaders */
1461         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1462                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1463
1464         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1465                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1466
1467         /* init the ib length to 0 */
1468         ib.length_dw = 0;
1469
1470         /* VGPR */
1471         /* write the register state for the compute dispatch */
1472         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1473                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1474                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1475                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1476         }
1477         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1478         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1479         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1480         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1481         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1482         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1483
1484         /* write dispatch packet */
1485         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1486         ib.ptr[ib.length_dw++] = 8; /* x */
1487         ib.ptr[ib.length_dw++] = 1; /* y */
1488         ib.ptr[ib.length_dw++] = 1; /* z */
1489         ib.ptr[ib.length_dw++] =
1490                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1491
1492         /* write CS partial flush packet */
1493         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1494         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1495
1496         /* SGPR1 */
1497         /* write the register state for the compute dispatch */
1498         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1499                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1500                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1501                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1502         }
1503         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1504         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1505         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1506         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1507         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1508         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1509
1510         /* write dispatch packet */
1511         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1512         ib.ptr[ib.length_dw++] = 8; /* x */
1513         ib.ptr[ib.length_dw++] = 1; /* y */
1514         ib.ptr[ib.length_dw++] = 1; /* z */
1515         ib.ptr[ib.length_dw++] =
1516                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1517
1518         /* write CS partial flush packet */
1519         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1520         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1521
1522         /* SGPR2 */
1523         /* write the register state for the compute dispatch */
1524         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1525                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1526                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1527                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1528         }
1529         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1530         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1531         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1532         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1533         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1534         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1535
1536         /* write dispatch packet */
1537         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1538         ib.ptr[ib.length_dw++] = 8; /* x */
1539         ib.ptr[ib.length_dw++] = 1; /* y */
1540         ib.ptr[ib.length_dw++] = 1; /* z */
1541         ib.ptr[ib.length_dw++] =
1542                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1543
1544         /* write CS partial flush packet */
1545         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1546         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1547
1548         /* shedule the ib on the ring */
1549         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1550         if (r) {
1551                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1552                 goto fail;
1553         }
1554
1555         /* wait for the GPU to finish processing the IB */
1556         r = fence_wait(f, false);
1557         if (r) {
1558                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1559                 goto fail;
1560         }
1561
1562         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1563         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1564         WREG32(mmGB_EDC_MODE, tmp);
1565
1566         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1567         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1568         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1569
1570
1571         /* read back registers to clear the counters */
1572         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1573                 RREG32(sec_ded_counter_registers[i]);
1574
1575 fail:
1576         fence_put(f);
1577         amdgpu_ib_free(adev, &ib, NULL);
1578         fence_put(f);
1579
1580         return r;
1581 }
1582
1583 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1584 {
1585         u32 gb_addr_config;
1586         u32 mc_shared_chmap, mc_arb_ramcfg;
1587         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1588         u32 tmp;
1589         int ret;
1590
1591         switch (adev->asic_type) {
1592         case CHIP_TOPAZ:
1593                 adev->gfx.config.max_shader_engines = 1;
1594                 adev->gfx.config.max_tile_pipes = 2;
1595                 adev->gfx.config.max_cu_per_sh = 6;
1596                 adev->gfx.config.max_sh_per_se = 1;
1597                 adev->gfx.config.max_backends_per_se = 2;
1598                 adev->gfx.config.max_texture_channel_caches = 2;
1599                 adev->gfx.config.max_gprs = 256;
1600                 adev->gfx.config.max_gs_threads = 32;
1601                 adev->gfx.config.max_hw_contexts = 8;
1602
1603                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1604                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1605                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1606                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1607                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1608                 break;
1609         case CHIP_FIJI:
1610                 adev->gfx.config.max_shader_engines = 4;
1611                 adev->gfx.config.max_tile_pipes = 16;
1612                 adev->gfx.config.max_cu_per_sh = 16;
1613                 adev->gfx.config.max_sh_per_se = 1;
1614                 adev->gfx.config.max_backends_per_se = 4;
1615                 adev->gfx.config.max_texture_channel_caches = 16;
1616                 adev->gfx.config.max_gprs = 256;
1617                 adev->gfx.config.max_gs_threads = 32;
1618                 adev->gfx.config.max_hw_contexts = 8;
1619
1620                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1621                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1622                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1623                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1624                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1625                 break;
1626         case CHIP_BAFFIN:
1627                 ret = amdgpu_atombios_get_gfx_info(adev);
1628                 if (ret)
1629                         return ret;
1630                 adev->gfx.config.max_gprs = 256;
1631                 adev->gfx.config.max_gs_threads = 32;
1632                 adev->gfx.config.max_hw_contexts = 8;
1633
1634                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1635                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1636                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1637                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1638                 gb_addr_config = BAFFIN_GB_ADDR_CONFIG_GOLDEN;
1639                 break;
1640         case CHIP_ELLESMERE:
1641                 ret = amdgpu_atombios_get_gfx_info(adev);
1642                 if (ret)
1643                         return ret;
1644                 adev->gfx.config.max_gprs = 256;
1645                 adev->gfx.config.max_gs_threads = 32;
1646                 adev->gfx.config.max_hw_contexts = 8;
1647
1648                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1649                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1650                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1651                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1652                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1653                 break;
1654         case CHIP_TONGA:
1655                 adev->gfx.config.max_shader_engines = 4;
1656                 adev->gfx.config.max_tile_pipes = 8;
1657                 adev->gfx.config.max_cu_per_sh = 8;
1658                 adev->gfx.config.max_sh_per_se = 1;
1659                 adev->gfx.config.max_backends_per_se = 2;
1660                 adev->gfx.config.max_texture_channel_caches = 8;
1661                 adev->gfx.config.max_gprs = 256;
1662                 adev->gfx.config.max_gs_threads = 32;
1663                 adev->gfx.config.max_hw_contexts = 8;
1664
1665                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1666                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1667                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1668                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1669                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1670                 break;
1671         case CHIP_CARRIZO:
1672                 adev->gfx.config.max_shader_engines = 1;
1673                 adev->gfx.config.max_tile_pipes = 2;
1674                 adev->gfx.config.max_sh_per_se = 1;
1675                 adev->gfx.config.max_backends_per_se = 2;
1676
1677                 switch (adev->pdev->revision) {
1678                 case 0xc4:
1679                 case 0x84:
1680                 case 0xc8:
1681                 case 0xcc:
1682                 case 0xe1:
1683                 case 0xe3:
1684                         /* B10 */
1685                         adev->gfx.config.max_cu_per_sh = 8;
1686                         break;
1687                 case 0xc5:
1688                 case 0x81:
1689                 case 0x85:
1690                 case 0xc9:
1691                 case 0xcd:
1692                 case 0xe2:
1693                 case 0xe4:
1694                         /* B8 */
1695                         adev->gfx.config.max_cu_per_sh = 6;
1696                         break;
1697                 case 0xc6:
1698                 case 0xca:
1699                 case 0xce:
1700                 case 0x88:
1701                         /* B6 */
1702                         adev->gfx.config.max_cu_per_sh = 6;
1703                         break;
1704                 case 0xc7:
1705                 case 0x87:
1706                 case 0xcb:
1707                 case 0xe5:
1708                 case 0x89:
1709                 default:
1710                         /* B4 */
1711                         adev->gfx.config.max_cu_per_sh = 4;
1712                         break;
1713                 }
1714
1715                 adev->gfx.config.max_texture_channel_caches = 2;
1716                 adev->gfx.config.max_gprs = 256;
1717                 adev->gfx.config.max_gs_threads = 32;
1718                 adev->gfx.config.max_hw_contexts = 8;
1719
1720                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1721                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1722                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1723                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1724                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1725                 break;
1726         case CHIP_STONEY:
1727                 adev->gfx.config.max_shader_engines = 1;
1728                 adev->gfx.config.max_tile_pipes = 2;
1729                 adev->gfx.config.max_sh_per_se = 1;
1730                 adev->gfx.config.max_backends_per_se = 1;
1731
1732                 switch (adev->pdev->revision) {
1733                 case 0xc0:
1734                 case 0xc1:
1735                 case 0xc2:
1736                 case 0xc4:
1737                 case 0xc8:
1738                 case 0xc9:
1739                         adev->gfx.config.max_cu_per_sh = 3;
1740                         break;
1741                 case 0xd0:
1742                 case 0xd1:
1743                 case 0xd2:
1744                 default:
1745                         adev->gfx.config.max_cu_per_sh = 2;
1746                         break;
1747                 }
1748
1749                 adev->gfx.config.max_texture_channel_caches = 2;
1750                 adev->gfx.config.max_gprs = 256;
1751                 adev->gfx.config.max_gs_threads = 16;
1752                 adev->gfx.config.max_hw_contexts = 8;
1753
1754                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1755                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1756                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1757                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1758                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1759                 break;
1760         default:
1761                 adev->gfx.config.max_shader_engines = 2;
1762                 adev->gfx.config.max_tile_pipes = 4;
1763                 adev->gfx.config.max_cu_per_sh = 2;
1764                 adev->gfx.config.max_sh_per_se = 1;
1765                 adev->gfx.config.max_backends_per_se = 2;
1766                 adev->gfx.config.max_texture_channel_caches = 4;
1767                 adev->gfx.config.max_gprs = 256;
1768                 adev->gfx.config.max_gs_threads = 32;
1769                 adev->gfx.config.max_hw_contexts = 8;
1770
1771                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1772                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1773                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1774                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1775                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1776                 break;
1777         }
1778
1779         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1780         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1781         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1782
1783         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1784         adev->gfx.config.mem_max_burst_length_bytes = 256;
1785         if (adev->flags & AMD_IS_APU) {
1786                 /* Get memory bank mapping mode. */
1787                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1788                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1789                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1790
1791                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1792                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1793                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1794
1795                 /* Validate settings in case only one DIMM installed. */
1796                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1797                         dimm00_addr_map = 0;
1798                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1799                         dimm01_addr_map = 0;
1800                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1801                         dimm10_addr_map = 0;
1802                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1803                         dimm11_addr_map = 0;
1804
1805                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1806                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1807                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1808                         adev->gfx.config.mem_row_size_in_kb = 2;
1809                 else
1810                         adev->gfx.config.mem_row_size_in_kb = 1;
1811         } else {
1812                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1813                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1814                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1815                         adev->gfx.config.mem_row_size_in_kb = 4;
1816         }
1817
1818         adev->gfx.config.shader_engine_tile_size = 32;
1819         adev->gfx.config.num_gpus = 1;
1820         adev->gfx.config.multi_gpu_tile_size = 64;
1821
1822         /* fix up row size */
1823         switch (adev->gfx.config.mem_row_size_in_kb) {
1824         case 1:
1825         default:
1826                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1827                 break;
1828         case 2:
1829                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1830                 break;
1831         case 4:
1832                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1833                 break;
1834         }
1835         adev->gfx.config.gb_addr_config = gb_addr_config;
1836
1837         return 0;
1838 }
1839
1840 static int gfx_v8_0_sw_init(void *handle)
1841 {
1842         int i, r;
1843         struct amdgpu_ring *ring;
1844         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1845
1846         /* EOP Event */
1847         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1848         if (r)
1849                 return r;
1850
1851         /* Privileged reg */
1852         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1853         if (r)
1854                 return r;
1855
1856         /* Privileged inst */
1857         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1858         if (r)
1859                 return r;
1860
1861         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1862
1863         gfx_v8_0_scratch_init(adev);
1864
1865         r = gfx_v8_0_init_microcode(adev);
1866         if (r) {
1867                 DRM_ERROR("Failed to load gfx firmware!\n");
1868                 return r;
1869         }
1870
1871         r = gfx_v8_0_rlc_init(adev);
1872         if (r) {
1873                 DRM_ERROR("Failed to init rlc BOs!\n");
1874                 return r;
1875         }
1876
1877         r = gfx_v8_0_mec_init(adev);
1878         if (r) {
1879                 DRM_ERROR("Failed to init MEC BOs!\n");
1880                 return r;
1881         }
1882
1883         /* set up the gfx ring */
1884         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1885                 ring = &adev->gfx.gfx_ring[i];
1886                 ring->ring_obj = NULL;
1887                 sprintf(ring->name, "gfx");
1888                 /* no gfx doorbells on iceland */
1889                 if (adev->asic_type != CHIP_TOPAZ) {
1890                         ring->use_doorbell = true;
1891                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1892                 }
1893
1894                 r = amdgpu_ring_init(adev, ring, 1024,
1895                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1896                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1897                                      AMDGPU_RING_TYPE_GFX);
1898                 if (r)
1899                         return r;
1900         }
1901
1902         /* set up the compute queues */
1903         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1904                 unsigned irq_type;
1905
1906                 /* max 32 queues per MEC */
1907                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1908                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1909                         break;
1910                 }
1911                 ring = &adev->gfx.compute_ring[i];
1912                 ring->ring_obj = NULL;
1913                 ring->use_doorbell = true;
1914                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1915                 ring->me = 1; /* first MEC */
1916                 ring->pipe = i / 8;
1917                 ring->queue = i % 8;
1918                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1919                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1920                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1921                 r = amdgpu_ring_init(adev, ring, 1024,
1922                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1923                                      &adev->gfx.eop_irq, irq_type,
1924                                      AMDGPU_RING_TYPE_COMPUTE);
1925                 if (r)
1926                         return r;
1927         }
1928
1929         /* reserve GDS, GWS and OA resource for gfx */
1930         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1931                         PAGE_SIZE, true,
1932                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1933                         NULL, &adev->gds.gds_gfx_bo);
1934         if (r)
1935                 return r;
1936
1937         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1938                 PAGE_SIZE, true,
1939                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1940                 NULL, &adev->gds.gws_gfx_bo);
1941         if (r)
1942                 return r;
1943
1944         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1945                         PAGE_SIZE, true,
1946                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1947                         NULL, &adev->gds.oa_gfx_bo);
1948         if (r)
1949                 return r;
1950
1951         adev->gfx.ce_ram_size = 0x8000;
1952
1953         r = gfx_v8_0_gpu_early_init(adev);
1954         if (r)
1955                 return r;
1956
1957         return 0;
1958 }
1959
1960 static int gfx_v8_0_sw_fini(void *handle)
1961 {
1962         int i;
1963         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1964
1965         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1966         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1967         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1968
1969         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1970                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1971         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1972                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1973
1974         gfx_v8_0_mec_fini(adev);
1975
1976         gfx_v8_0_rlc_fini(adev);
1977
1978         kfree(adev->gfx.rlc.register_list_format);
1979
1980         return 0;
1981 }
1982
1983 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1984 {
1985         uint32_t *modearray, *mod2array;
1986         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1987         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1988         u32 reg_offset;
1989
1990         modearray = adev->gfx.config.tile_mode_array;
1991         mod2array = adev->gfx.config.macrotile_mode_array;
1992
1993         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1994                 modearray[reg_offset] = 0;
1995
1996         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1997                 mod2array[reg_offset] = 0;
1998
1999         switch (adev->asic_type) {
2000         case CHIP_TOPAZ:
2001                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2002                                 PIPE_CONFIG(ADDR_SURF_P2) |
2003                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2005                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2006                                 PIPE_CONFIG(ADDR_SURF_P2) |
2007                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2009                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010                                 PIPE_CONFIG(ADDR_SURF_P2) |
2011                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2013                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014                                 PIPE_CONFIG(ADDR_SURF_P2) |
2015                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2017                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2018                                 PIPE_CONFIG(ADDR_SURF_P2) |
2019                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2021                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022                                 PIPE_CONFIG(ADDR_SURF_P2) |
2023                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2025                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2026                                 PIPE_CONFIG(ADDR_SURF_P2) |
2027                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2028                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2029                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2030                                 PIPE_CONFIG(ADDR_SURF_P2));
2031                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2032                                 PIPE_CONFIG(ADDR_SURF_P2) |
2033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2034                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2035                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2036                                  PIPE_CONFIG(ADDR_SURF_P2) |
2037                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2038                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2039                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2040                                  PIPE_CONFIG(ADDR_SURF_P2) |
2041                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2042                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2043                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2044                                  PIPE_CONFIG(ADDR_SURF_P2) |
2045                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2046                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2047                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048                                  PIPE_CONFIG(ADDR_SURF_P2) |
2049                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2050                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2051                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2052                                  PIPE_CONFIG(ADDR_SURF_P2) |
2053                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2054                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2056                                  PIPE_CONFIG(ADDR_SURF_P2) |
2057                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2058                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2059                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2060                                  PIPE_CONFIG(ADDR_SURF_P2) |
2061                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2062                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2063                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2064                                  PIPE_CONFIG(ADDR_SURF_P2) |
2065                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2066                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2067                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2068                                  PIPE_CONFIG(ADDR_SURF_P2) |
2069                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2070                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2071                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2072                                  PIPE_CONFIG(ADDR_SURF_P2) |
2073                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2074                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2075                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2076                                  PIPE_CONFIG(ADDR_SURF_P2) |
2077                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2078                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2079                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2080                                  PIPE_CONFIG(ADDR_SURF_P2) |
2081                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2083                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2084                                  PIPE_CONFIG(ADDR_SURF_P2) |
2085                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2086                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2087                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2088                                  PIPE_CONFIG(ADDR_SURF_P2) |
2089                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2090                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2091                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2092                                  PIPE_CONFIG(ADDR_SURF_P2) |
2093                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2094                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2095                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096                                  PIPE_CONFIG(ADDR_SURF_P2) |
2097                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2098                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2100                                  PIPE_CONFIG(ADDR_SURF_P2) |
2101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2103
2104                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2105                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2106                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2107                                 NUM_BANKS(ADDR_SURF_8_BANK));
2108                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2109                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2110                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2111                                 NUM_BANKS(ADDR_SURF_8_BANK));
2112                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2113                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2114                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2115                                 NUM_BANKS(ADDR_SURF_8_BANK));
2116                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2117                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2118                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2119                                 NUM_BANKS(ADDR_SURF_8_BANK));
2120                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2121                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2122                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2123                                 NUM_BANKS(ADDR_SURF_8_BANK));
2124                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2125                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2126                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2127                                 NUM_BANKS(ADDR_SURF_8_BANK));
2128                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2131                                 NUM_BANKS(ADDR_SURF_8_BANK));
2132                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2133                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2134                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2135                                 NUM_BANKS(ADDR_SURF_16_BANK));
2136                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2137                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2138                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2139                                 NUM_BANKS(ADDR_SURF_16_BANK));
2140                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2141                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2143                                  NUM_BANKS(ADDR_SURF_16_BANK));
2144                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2145                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2146                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2147                                  NUM_BANKS(ADDR_SURF_16_BANK));
2148                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2150                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2151                                  NUM_BANKS(ADDR_SURF_16_BANK));
2152                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2154                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2155                                  NUM_BANKS(ADDR_SURF_16_BANK));
2156                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2159                                  NUM_BANKS(ADDR_SURF_8_BANK));
2160
2161                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2162                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2163                             reg_offset != 23)
2164                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2165
2166                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2167                         if (reg_offset != 7)
2168                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2169
2170                 break;
2171         case CHIP_FIJI:
2172                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2184                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2186                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2188                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2189                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2190                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2192                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2194                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2196                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2197                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2198                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2199                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2200                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2201                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2202                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2203                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2204                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2205                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2206                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2207                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2208                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2209                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2210                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2214                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2215                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2218                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2219                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2222                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2223                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2226                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2227                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2230                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2231                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2234                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2235                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2238                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2239                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2242                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2243                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2246                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2247                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2250                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2251                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2254                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2255                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2258                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2259                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2262                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2263                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2266                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2267                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2270                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2271                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2274                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2275                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2279                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2286                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2287                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2290                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2291                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2294
2295                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2297                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2298                                 NUM_BANKS(ADDR_SURF_8_BANK));
2299                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2301                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2302                                 NUM_BANKS(ADDR_SURF_8_BANK));
2303                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2305                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306                                 NUM_BANKS(ADDR_SURF_8_BANK));
2307                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2310                                 NUM_BANKS(ADDR_SURF_8_BANK));
2311                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2313                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2314                                 NUM_BANKS(ADDR_SURF_8_BANK));
2315                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2317                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2318                                 NUM_BANKS(ADDR_SURF_8_BANK));
2319                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2320                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2321                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2322                                 NUM_BANKS(ADDR_SURF_8_BANK));
2323                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2325                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2326                                 NUM_BANKS(ADDR_SURF_8_BANK));
2327                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330                                 NUM_BANKS(ADDR_SURF_8_BANK));
2331                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2333                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2334                                  NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2337                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2338                                  NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2341                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                  NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346                                  NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2350                                  NUM_BANKS(ADDR_SURF_4_BANK));
2351
2352                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2353                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2354
2355                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2356                         if (reg_offset != 7)
2357                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2358
2359                 break;
2360         case CHIP_TONGA:
2361                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2363                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2364                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2365                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2367                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2368                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2369                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2371                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2372                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2373                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2375                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2376                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2377                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2379                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2381                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2382                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2383                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2384                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2385                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2387                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2388                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2394                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2395                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2397                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2404                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2407                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2411                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2412                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2419                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2420                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2427                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2431                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2432                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2435                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2436                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2439                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2440                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2443                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2444                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2447                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2448                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2451                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2452                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2455                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2456                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2459                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2460                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2468                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2471                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2475                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2476                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2479                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2480                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2483
2484                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2486                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2487                                 NUM_BANKS(ADDR_SURF_16_BANK));
2488                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2491                                 NUM_BANKS(ADDR_SURF_16_BANK));
2492                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2495                                 NUM_BANKS(ADDR_SURF_16_BANK));
2496                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499                                 NUM_BANKS(ADDR_SURF_16_BANK));
2500                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2503                                 NUM_BANKS(ADDR_SURF_16_BANK));
2504                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                                 NUM_BANKS(ADDR_SURF_16_BANK));
2508                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                                 NUM_BANKS(ADDR_SURF_16_BANK));
2512                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2515                                 NUM_BANKS(ADDR_SURF_16_BANK));
2516                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519                                 NUM_BANKS(ADDR_SURF_16_BANK));
2520                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2522                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                  NUM_BANKS(ADDR_SURF_16_BANK));
2524                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2526                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2527                                  NUM_BANKS(ADDR_SURF_16_BANK));
2528                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2530                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2531                                  NUM_BANKS(ADDR_SURF_8_BANK));
2532                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535                                  NUM_BANKS(ADDR_SURF_4_BANK));
2536                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                  NUM_BANKS(ADDR_SURF_4_BANK));
2540
2541                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2542                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2543
2544                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2545                         if (reg_offset != 7)
2546                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2547
2548                 break;
2549         case CHIP_BAFFIN:
2550                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2553                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2554                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2555                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2557                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2558                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2560                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2561                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2562                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2565                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2566                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2568                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2569                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2570                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2573                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2574                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2577                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2578                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2581                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2582                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2583                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2584                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2585                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2586                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2588                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2590                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2591                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2596                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2597                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2599                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2600                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2603                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2607                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2608                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2609                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2610                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2615                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2616                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2617                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2620                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2621                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2623                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2624                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2625                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2627                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2628                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2629                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2631                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2632                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2633                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2635                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2636                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2637                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2639                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2640                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2643                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2644                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2645                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2648                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2649                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2651                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2652                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2653                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2655                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2657                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2659                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2660                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2663                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2664                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2667                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2668                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2671                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2672
2673                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2676                                 NUM_BANKS(ADDR_SURF_16_BANK));
2677
2678                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2680                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2681                                 NUM_BANKS(ADDR_SURF_16_BANK));
2682
2683                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2685                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2686                                 NUM_BANKS(ADDR_SURF_16_BANK));
2687
2688                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2690                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2691                                 NUM_BANKS(ADDR_SURF_16_BANK));
2692
2693                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2694                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2695                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2696                                 NUM_BANKS(ADDR_SURF_16_BANK));
2697
2698                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2701                                 NUM_BANKS(ADDR_SURF_16_BANK));
2702
2703                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2706                                 NUM_BANKS(ADDR_SURF_16_BANK));
2707
2708                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2709                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2710                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2711                                 NUM_BANKS(ADDR_SURF_16_BANK));
2712
2713                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2714                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2715                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716                                 NUM_BANKS(ADDR_SURF_16_BANK));
2717
2718                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721                                 NUM_BANKS(ADDR_SURF_16_BANK));
2722
2723                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2725                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726                                 NUM_BANKS(ADDR_SURF_16_BANK));
2727
2728                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2730                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2731                                 NUM_BANKS(ADDR_SURF_16_BANK));
2732
2733                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2736                                 NUM_BANKS(ADDR_SURF_8_BANK));
2737
2738                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741                                 NUM_BANKS(ADDR_SURF_4_BANK));
2742
2743                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2744                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2745
2746                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2747                         if (reg_offset != 7)
2748                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2749
2750                 break;
2751         case CHIP_ELLESMERE:
2752                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2754                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2758                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2762                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2764                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2766                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2768                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2770                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2772                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2774                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2776                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2777                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2778                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2780                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2785                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2786                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2787                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2789                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2790                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2793                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2798                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2799                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2801                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2802                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2818                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2822                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2823                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2826                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2827                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2830                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2831                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2834                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2835                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2838                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2839                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2842                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2843                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2846                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2847                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2850                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2851                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2853                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2855                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2857                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2859                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2861                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2863                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2866                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2869                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2870                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2873                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2874
2875                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2877                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2878                                 NUM_BANKS(ADDR_SURF_16_BANK));
2879
2880                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883                                 NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2887                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888                                 NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893                                 NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2897                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2898                                 NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2903                                 NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2908                                 NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2912                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2913                                 NUM_BANKS(ADDR_SURF_16_BANK));
2914
2915                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2917                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918                                 NUM_BANKS(ADDR_SURF_16_BANK));
2919
2920                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2923                                 NUM_BANKS(ADDR_SURF_16_BANK));
2924
2925                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2927                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2928                                 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2932                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2933                                 NUM_BANKS(ADDR_SURF_8_BANK));
2934
2935                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2938                                 NUM_BANKS(ADDR_SURF_4_BANK));
2939
2940                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2942                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2943                                 NUM_BANKS(ADDR_SURF_4_BANK));
2944
2945                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2946                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2947
2948                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2949                         if (reg_offset != 7)
2950                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2951
2952                 break;
2953         case CHIP_STONEY:
2954                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P2) |
2956                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2958                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P2) |
2960                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2962                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P2) |
2964                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2966                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2967                                 PIPE_CONFIG(ADDR_SURF_P2) |
2968                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2970                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971                                 PIPE_CONFIG(ADDR_SURF_P2) |
2972                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2974                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2975                                 PIPE_CONFIG(ADDR_SURF_P2) |
2976                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2978                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2979                                 PIPE_CONFIG(ADDR_SURF_P2) |
2980                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2982                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2983                                 PIPE_CONFIG(ADDR_SURF_P2));
2984                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2985                                 PIPE_CONFIG(ADDR_SURF_P2) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2987                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2988                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2989                                  PIPE_CONFIG(ADDR_SURF_P2) |
2990                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2991                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2993                                  PIPE_CONFIG(ADDR_SURF_P2) |
2994                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2995                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2996                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2997                                  PIPE_CONFIG(ADDR_SURF_P2) |
2998                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2999                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3000                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3001                                  PIPE_CONFIG(ADDR_SURF_P2) |
3002                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3003                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3004                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3005                                  PIPE_CONFIG(ADDR_SURF_P2) |
3006                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3009                                  PIPE_CONFIG(ADDR_SURF_P2) |
3010                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3011                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3012                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3013                                  PIPE_CONFIG(ADDR_SURF_P2) |
3014                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3015                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3016                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3017                                  PIPE_CONFIG(ADDR_SURF_P2) |
3018                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3019                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3020                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3021                                  PIPE_CONFIG(ADDR_SURF_P2) |
3022                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3023                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3024                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3025                                  PIPE_CONFIG(ADDR_SURF_P2) |
3026                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3027                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3028                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3029                                  PIPE_CONFIG(ADDR_SURF_P2) |
3030                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3031                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3032                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3033                                  PIPE_CONFIG(ADDR_SURF_P2) |
3034                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3036                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3037                                  PIPE_CONFIG(ADDR_SURF_P2) |
3038                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3039                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3040                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3041                                  PIPE_CONFIG(ADDR_SURF_P2) |
3042                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3043                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3044                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3045                                  PIPE_CONFIG(ADDR_SURF_P2) |
3046                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3047                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3048                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3049                                  PIPE_CONFIG(ADDR_SURF_P2) |
3050                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3051                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3052                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3053                                  PIPE_CONFIG(ADDR_SURF_P2) |
3054                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3056
3057                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3059                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3060                                 NUM_BANKS(ADDR_SURF_8_BANK));
3061                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3063                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064                                 NUM_BANKS(ADDR_SURF_8_BANK));
3065                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3068                                 NUM_BANKS(ADDR_SURF_8_BANK));
3069                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072                                 NUM_BANKS(ADDR_SURF_8_BANK));
3073                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076                                 NUM_BANKS(ADDR_SURF_8_BANK));
3077                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3080                                 NUM_BANKS(ADDR_SURF_8_BANK));
3081                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3084                                 NUM_BANKS(ADDR_SURF_8_BANK));
3085                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3086                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3087                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3088                                 NUM_BANKS(ADDR_SURF_16_BANK));
3089                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3090                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3091                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3092                                 NUM_BANKS(ADDR_SURF_16_BANK));
3093                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3094                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3095                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096                                  NUM_BANKS(ADDR_SURF_16_BANK));
3097                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3098                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3099                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100                                  NUM_BANKS(ADDR_SURF_16_BANK));
3101                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3103                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3104                                  NUM_BANKS(ADDR_SURF_16_BANK));
3105                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3108                                  NUM_BANKS(ADDR_SURF_16_BANK));
3109                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3110                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3111                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3112                                  NUM_BANKS(ADDR_SURF_8_BANK));
3113
3114                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3115                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3116                             reg_offset != 23)
3117                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3118
3119                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3120                         if (reg_offset != 7)
3121                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3122
3123                 break;
3124         default:
3125                 dev_warn(adev->dev,
3126                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3127                          adev->asic_type);
3128
3129         case CHIP_CARRIZO:
3130                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3131                                 PIPE_CONFIG(ADDR_SURF_P2) |
3132                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3134                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                 PIPE_CONFIG(ADDR_SURF_P2) |
3136                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3138                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3139                                 PIPE_CONFIG(ADDR_SURF_P2) |
3140                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3142                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3143                                 PIPE_CONFIG(ADDR_SURF_P2) |
3144                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3146                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3147                                 PIPE_CONFIG(ADDR_SURF_P2) |
3148                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3149                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3150                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3151                                 PIPE_CONFIG(ADDR_SURF_P2) |
3152                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3154                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3155                                 PIPE_CONFIG(ADDR_SURF_P2) |
3156                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3158                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3159                                 PIPE_CONFIG(ADDR_SURF_P2));
3160                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3161                                 PIPE_CONFIG(ADDR_SURF_P2) |
3162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3163                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3164                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3168                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3172                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3180                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3184                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3188                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3196                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3200                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3204                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3205                                  PIPE_CONFIG(ADDR_SURF_P2) |
3206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3208                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3209                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3212                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3213                                  PIPE_CONFIG(ADDR_SURF_P2) |
3214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3216                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3217                                  PIPE_CONFIG(ADDR_SURF_P2) |
3218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3220                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3221                                  PIPE_CONFIG(ADDR_SURF_P2) |
3222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3224                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3229                                  PIPE_CONFIG(ADDR_SURF_P2) |
3230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3232
3233                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236                                 NUM_BANKS(ADDR_SURF_8_BANK));
3237                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                 NUM_BANKS(ADDR_SURF_8_BANK));
3241                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3244                                 NUM_BANKS(ADDR_SURF_8_BANK));
3245                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3248                                 NUM_BANKS(ADDR_SURF_8_BANK));
3249                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3252                                 NUM_BANKS(ADDR_SURF_8_BANK));
3253                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256                                 NUM_BANKS(ADDR_SURF_8_BANK));
3257                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260                                 NUM_BANKS(ADDR_SURF_8_BANK));
3261                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3264                                 NUM_BANKS(ADDR_SURF_16_BANK));
3265                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3268                                 NUM_BANKS(ADDR_SURF_16_BANK));
3269                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3270                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3271                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272                                  NUM_BANKS(ADDR_SURF_16_BANK));
3273                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3274                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3275                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3276                                  NUM_BANKS(ADDR_SURF_16_BANK));
3277                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3278                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3279                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3280                                  NUM_BANKS(ADDR_SURF_16_BANK));
3281                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3284                                  NUM_BANKS(ADDR_SURF_16_BANK));
3285                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3287                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3288                                  NUM_BANKS(ADDR_SURF_8_BANK));
3289
3290                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3291                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3292                             reg_offset != 23)
3293                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3294
3295                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3296                         if (reg_offset != 7)
3297                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3298
3299                 break;
3300         }
3301 }
3302
3303 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3304 {
3305         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3306
3307         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3308                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3309                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3310         } else if (se_num == 0xffffffff) {
3311                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3312                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3313         } else if (sh_num == 0xffffffff) {
3314                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3315                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3316         } else {
3317                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3318                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3319         }
3320         WREG32(mmGRBM_GFX_INDEX, data);
3321 }
3322
3323 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3324 {
3325         return (u32)((1ULL << bit_width) - 1);
3326 }
3327
3328 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3329 {
3330         u32 data, mask;
3331
3332         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3333         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3334
3335         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3336         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3337
3338         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3339                                        adev->gfx.config.max_sh_per_se);
3340
3341         return (~data) & mask;
3342 }
3343
3344 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3345 {
3346         int i, j;
3347         u32 data;
3348         u32 active_rbs = 0;
3349         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3350                                         adev->gfx.config.max_sh_per_se;
3351
3352         mutex_lock(&adev->grbm_idx_mutex);
3353         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3354                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3355                         gfx_v8_0_select_se_sh(adev, i, j);
3356                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3357                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3358                                                rb_bitmap_width_per_sh);
3359                 }
3360         }
3361         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3362         mutex_unlock(&adev->grbm_idx_mutex);
3363
3364         adev->gfx.config.backend_enable_mask = active_rbs;
3365         adev->gfx.config.num_rbs = hweight32(active_rbs);
3366 }
3367
3368 /**
3369  * gfx_v8_0_init_compute_vmid - gart enable
3370  *
3371  * @rdev: amdgpu_device pointer
3372  *
3373  * Initialize compute vmid sh_mem registers
3374  *
3375  */
3376 #define DEFAULT_SH_MEM_BASES    (0x6000)
3377 #define FIRST_COMPUTE_VMID      (8)
3378 #define LAST_COMPUTE_VMID       (16)
3379 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3380 {
3381         int i;
3382         uint32_t sh_mem_config;
3383         uint32_t sh_mem_bases;
3384
3385         /*
3386          * Configure apertures:
3387          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3388          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3389          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3390          */
3391         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3392
3393         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3394                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3395                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3396                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3397                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3398                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3399
3400         mutex_lock(&adev->srbm_mutex);
3401         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3402                 vi_srbm_select(adev, 0, 0, 0, i);
3403                 /* CP and shaders */
3404                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3405                 WREG32(mmSH_MEM_APE1_BASE, 1);
3406                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3407                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3408         }
3409         vi_srbm_select(adev, 0, 0, 0, 0);
3410         mutex_unlock(&adev->srbm_mutex);
3411 }
3412
3413 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3414 {
3415         u32 tmp;
3416         int i;
3417
3418         tmp = RREG32(mmGRBM_CNTL);
3419         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3420         WREG32(mmGRBM_CNTL, tmp);
3421
3422         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3423         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3424         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3425
3426         gfx_v8_0_tiling_mode_table_init(adev);
3427
3428         gfx_v8_0_setup_rb(adev);
3429
3430         /* XXX SH_MEM regs */
3431         /* where to put LDS, scratch, GPUVM in FSA64 space */
3432         mutex_lock(&adev->srbm_mutex);
3433         for (i = 0; i < 16; i++) {
3434                 vi_srbm_select(adev, 0, 0, 0, i);
3435                 /* CP and shaders */
3436                 if (i == 0) {
3437                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3438                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3439                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3440                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3441                         WREG32(mmSH_MEM_CONFIG, tmp);
3442                 } else {
3443                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3444                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3445                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3446                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3447                         WREG32(mmSH_MEM_CONFIG, tmp);
3448                 }
3449
3450                 WREG32(mmSH_MEM_APE1_BASE, 1);
3451                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3452                 WREG32(mmSH_MEM_BASES, 0);
3453         }
3454         vi_srbm_select(adev, 0, 0, 0, 0);
3455         mutex_unlock(&adev->srbm_mutex);
3456
3457         gfx_v8_0_init_compute_vmid(adev);
3458
3459         mutex_lock(&adev->grbm_idx_mutex);
3460         /*
3461          * making sure that the following register writes will be broadcasted
3462          * to all the shaders
3463          */
3464         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3465
3466         WREG32(mmPA_SC_FIFO_SIZE,
3467                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3468                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3469                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3470                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3471                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3472                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3473                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3474                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3475         mutex_unlock(&adev->grbm_idx_mutex);
3476
3477 }
3478
3479 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3480 {
3481         u32 i, j, k;
3482         u32 mask;
3483
3484         mutex_lock(&adev->grbm_idx_mutex);
3485         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3486                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3487                         gfx_v8_0_select_se_sh(adev, i, j);
3488                         for (k = 0; k < adev->usec_timeout; k++) {
3489                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3490                                         break;
3491                                 udelay(1);
3492                         }
3493                 }
3494         }
3495         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3496         mutex_unlock(&adev->grbm_idx_mutex);
3497
3498         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3499                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3500                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3501                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3502         for (k = 0; k < adev->usec_timeout; k++) {
3503                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3504                         break;
3505                 udelay(1);
3506         }
3507 }
3508
3509 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3510                                                bool enable)
3511 {
3512         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3513
3514         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3515         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3516         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3517         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3518
3519         WREG32(mmCP_INT_CNTL_RING0, tmp);
3520 }
3521
3522 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3523 {
3524         /* csib */
3525         WREG32(mmRLC_CSIB_ADDR_HI,
3526                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3527         WREG32(mmRLC_CSIB_ADDR_LO,
3528                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3529         WREG32(mmRLC_CSIB_LENGTH,
3530                         adev->gfx.rlc.clear_state_size);
3531 }
3532
3533 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3534                                 int ind_offset,
3535                                 int list_size,
3536                                 int *unique_indices,
3537                                 int *indices_count,
3538                                 int max_indices,
3539                                 int *ind_start_offsets,
3540                                 int *offset_count,
3541                                 int max_offset)
3542 {
3543         int indices;
3544         bool new_entry = true;
3545
3546         for (; ind_offset < list_size; ind_offset++) {
3547
3548                 if (new_entry) {
3549                         new_entry = false;
3550                         ind_start_offsets[*offset_count] = ind_offset;
3551                         *offset_count = *offset_count + 1;
3552                         BUG_ON(*offset_count >= max_offset);
3553                 }
3554
3555                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3556                         new_entry = true;
3557                         continue;
3558                 }
3559
3560                 ind_offset += 2;
3561
3562                 /* look for the matching indice */
3563                 for (indices = 0;
3564                         indices < *indices_count;
3565                         indices++) {
3566                         if (unique_indices[indices] ==
3567                                 register_list_format[ind_offset])
3568                                 break;
3569                 }
3570
3571                 if (indices >= *indices_count) {
3572                         unique_indices[*indices_count] =
3573                                 register_list_format[ind_offset];
3574                         indices = *indices_count;
3575                         *indices_count = *indices_count + 1;
3576                         BUG_ON(*indices_count >= max_indices);
3577                 }
3578
3579                 register_list_format[ind_offset] = indices;
3580         }
3581 }
3582
3583 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3584 {
3585         int i, temp, data;
3586         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3587         int indices_count = 0;
3588         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3589         int offset_count = 0;
3590
3591         int list_size;
3592         unsigned int *register_list_format =
3593                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3594         if (register_list_format == NULL)
3595                 return -ENOMEM;
3596         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3597                         adev->gfx.rlc.reg_list_format_size_bytes);
3598
3599         gfx_v8_0_parse_ind_reg_list(register_list_format,
3600                                 RLC_FormatDirectRegListLength,
3601                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3602                                 unique_indices,
3603                                 &indices_count,
3604                                 sizeof(unique_indices) / sizeof(int),
3605                                 indirect_start_offsets,
3606                                 &offset_count,
3607                                 sizeof(indirect_start_offsets)/sizeof(int));
3608
3609         /* save and restore list */
3610         temp = RREG32(mmRLC_SRM_CNTL);
3611         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3612         WREG32(mmRLC_SRM_CNTL, temp);
3613
3614         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3615         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3616                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3617
3618         /* indirect list */
3619         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3620         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3621                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3622
3623         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3624         list_size = list_size >> 1;
3625         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3626         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3627
3628         /* starting offsets starts */
3629         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3630                 adev->gfx.rlc.starting_offsets_start);
3631         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3632                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3633                                 indirect_start_offsets[i]);
3634
3635         /* unique indices */
3636         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3637         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3638         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3639                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3640                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3641         }
3642         kfree(register_list_format);
3643
3644         return 0;
3645 }
3646
3647 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3648 {
3649         uint32_t data;
3650
3651         data = RREG32(mmRLC_SRM_CNTL);
3652         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3653         WREG32(mmRLC_SRM_CNTL, data);
3654 }
3655
3656 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3657 {
3658         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3659                               AMD_PG_SUPPORT_GFX_SMG |
3660                               AMD_PG_SUPPORT_GFX_DMG |
3661                               AMD_PG_SUPPORT_CP |
3662                               AMD_PG_SUPPORT_GDS |
3663                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3664                 gfx_v8_0_init_csb(adev);
3665                 gfx_v8_0_init_save_restore_list(adev);
3666                 gfx_v8_0_enable_save_restore_machine(adev);
3667         }
3668 }
3669
3670 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3671 {
3672         u32 tmp = RREG32(mmRLC_CNTL);
3673
3674         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3675         WREG32(mmRLC_CNTL, tmp);
3676
3677         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3678
3679         gfx_v8_0_wait_for_rlc_serdes(adev);
3680 }
3681
3682 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3683 {
3684         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3685
3686         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3687         WREG32(mmGRBM_SOFT_RESET, tmp);
3688         udelay(50);
3689         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3690         WREG32(mmGRBM_SOFT_RESET, tmp);
3691         udelay(50);
3692 }
3693
3694 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3695 {
3696         u32 tmp = RREG32(mmRLC_CNTL);
3697
3698         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3699         WREG32(mmRLC_CNTL, tmp);
3700
3701         /* carrizo do enable cp interrupt after cp inited */
3702         if (!(adev->flags & AMD_IS_APU))
3703                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3704
3705         udelay(50);
3706 }
3707
3708 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3709 {
3710         const struct rlc_firmware_header_v2_0 *hdr;
3711         const __le32 *fw_data;
3712         unsigned i, fw_size;
3713
3714         if (!adev->gfx.rlc_fw)
3715                 return -EINVAL;
3716
3717         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3718         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3719
3720         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3721                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3722         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3723
3724         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3725         for (i = 0; i < fw_size; i++)
3726                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3727         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3728
3729         return 0;
3730 }
3731
3732 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3733 {
3734         int r;
3735
3736         gfx_v8_0_rlc_stop(adev);
3737
3738         /* disable CG */
3739         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3740         if (adev->asic_type == CHIP_BAFFIN ||
3741                 adev->asic_type == CHIP_ELLESMERE)
3742                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3743
3744         /* disable PG */
3745         WREG32(mmRLC_PG_CNTL, 0);
3746
3747         gfx_v8_0_rlc_reset(adev);
3748
3749         gfx_v8_0_init_pg(adev);
3750
3751         if (!adev->pp_enabled) {
3752                 if (!adev->firmware.smu_load) {
3753                         /* legacy rlc firmware loading */
3754                         r = gfx_v8_0_rlc_load_microcode(adev);
3755                         if (r)
3756                                 return r;
3757                 } else {
3758                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3759                                                         AMDGPU_UCODE_ID_RLC_G);
3760                         if (r)
3761                                 return -EINVAL;
3762                 }
3763         }
3764
3765         gfx_v8_0_rlc_start(adev);
3766
3767         return 0;
3768 }
3769
3770 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3771 {
3772         int i;
3773         u32 tmp = RREG32(mmCP_ME_CNTL);
3774
3775         if (enable) {
3776                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3777                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3778                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3779         } else {
3780                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3781                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3782                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3783                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3784                         adev->gfx.gfx_ring[i].ready = false;
3785         }
3786         WREG32(mmCP_ME_CNTL, tmp);
3787         udelay(50);
3788 }
3789
3790 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3791 {
3792         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3793         const struct gfx_firmware_header_v1_0 *ce_hdr;
3794         const struct gfx_firmware_header_v1_0 *me_hdr;
3795         const __le32 *fw_data;
3796         unsigned i, fw_size;
3797
3798         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3799                 return -EINVAL;
3800
3801         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3802                 adev->gfx.pfp_fw->data;
3803         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3804                 adev->gfx.ce_fw->data;
3805         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3806                 adev->gfx.me_fw->data;
3807
3808         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3809         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3810         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3811
3812         gfx_v8_0_cp_gfx_enable(adev, false);
3813
3814         /* PFP */
3815         fw_data = (const __le32 *)
3816                 (adev->gfx.pfp_fw->data +
3817                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3818         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3819         WREG32(mmCP_PFP_UCODE_ADDR, 0);
3820         for (i = 0; i < fw_size; i++)
3821                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3822         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3823
3824         /* CE */
3825         fw_data = (const __le32 *)
3826                 (adev->gfx.ce_fw->data +
3827                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3828         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3829         WREG32(mmCP_CE_UCODE_ADDR, 0);
3830         for (i = 0; i < fw_size; i++)
3831                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3832         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3833
3834         /* ME */
3835         fw_data = (const __le32 *)
3836                 (adev->gfx.me_fw->data +
3837                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3838         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3839         WREG32(mmCP_ME_RAM_WADDR, 0);
3840         for (i = 0; i < fw_size; i++)
3841                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3842         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3843
3844         return 0;
3845 }
3846
3847 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3848 {
3849         u32 count = 0;
3850         const struct cs_section_def *sect = NULL;
3851         const struct cs_extent_def *ext = NULL;
3852
3853         /* begin clear state */
3854         count += 2;
3855         /* context control state */
3856         count += 3;
3857
3858         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3859                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3860                         if (sect->id == SECT_CONTEXT)
3861                                 count += 2 + ext->reg_count;
3862                         else
3863                                 return 0;
3864                 }
3865         }
3866         /* pa_sc_raster_config/pa_sc_raster_config1 */
3867         count += 4;
3868         /* end clear state */
3869         count += 2;
3870         /* clear state */
3871         count += 2;
3872
3873         return count;
3874 }
3875
3876 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3877 {
3878         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3879         const struct cs_section_def *sect = NULL;
3880         const struct cs_extent_def *ext = NULL;
3881         int r, i;
3882
3883         /* init the CP */
3884         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3885         WREG32(mmCP_ENDIAN_SWAP, 0);
3886         WREG32(mmCP_DEVICE_ID, 1);
3887
3888         gfx_v8_0_cp_gfx_enable(adev, true);
3889
3890         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3891         if (r) {
3892                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3893                 return r;
3894         }
3895
3896         /* clear state buffer */
3897         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3898         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3899
3900         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3901         amdgpu_ring_write(ring, 0x80000000);
3902         amdgpu_ring_write(ring, 0x80000000);
3903
3904         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3905                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3906                         if (sect->id == SECT_CONTEXT) {
3907                                 amdgpu_ring_write(ring,
3908                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3909                                                ext->reg_count));
3910                                 amdgpu_ring_write(ring,
3911                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3912                                 for (i = 0; i < ext->reg_count; i++)
3913                                         amdgpu_ring_write(ring, ext->extent[i]);
3914                         }
3915                 }
3916         }
3917
3918         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3919         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3920         switch (adev->asic_type) {
3921         case CHIP_TONGA:
3922         case CHIP_ELLESMERE:
3923                 amdgpu_ring_write(ring, 0x16000012);
3924                 amdgpu_ring_write(ring, 0x0000002A);
3925                 break;
3926         case CHIP_BAFFIN:
3927                 amdgpu_ring_write(ring, 0x16000012);
3928                 amdgpu_ring_write(ring, 0x00000000);
3929                 break;
3930         case CHIP_FIJI:
3931                 amdgpu_ring_write(ring, 0x3a00161a);
3932                 amdgpu_ring_write(ring, 0x0000002e);
3933                 break;
3934         case CHIP_TOPAZ:
3935         case CHIP_CARRIZO:
3936                 amdgpu_ring_write(ring, 0x00000002);
3937                 amdgpu_ring_write(ring, 0x00000000);
3938                 break;
3939         case CHIP_STONEY:
3940                 amdgpu_ring_write(ring, 0x00000000);
3941                 amdgpu_ring_write(ring, 0x00000000);
3942                 break;
3943         default:
3944                 BUG();
3945         }
3946
3947         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3948         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3949
3950         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3951         amdgpu_ring_write(ring, 0);
3952
3953         /* init the CE partitions */
3954         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3955         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3956         amdgpu_ring_write(ring, 0x8000);
3957         amdgpu_ring_write(ring, 0x8000);
3958
3959         amdgpu_ring_commit(ring);
3960
3961         return 0;
3962 }
3963
3964 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3965 {
3966         struct amdgpu_ring *ring;
3967         u32 tmp;
3968         u32 rb_bufsz;
3969         u64 rb_addr, rptr_addr;
3970         int r;
3971
3972         /* Set the write pointer delay */
3973         WREG32(mmCP_RB_WPTR_DELAY, 0);
3974
3975         /* set the RB to use vmid 0 */
3976         WREG32(mmCP_RB_VMID, 0);
3977
3978         /* Set ring buffer size */
3979         ring = &adev->gfx.gfx_ring[0];
3980         rb_bufsz = order_base_2(ring->ring_size / 8);
3981         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3982         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3983         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3984         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3985 #ifdef __BIG_ENDIAN
3986         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3987 #endif
3988         WREG32(mmCP_RB0_CNTL, tmp);
3989
3990         /* Initialize the ring buffer's read and write pointers */
3991         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3992         ring->wptr = 0;
3993         WREG32(mmCP_RB0_WPTR, ring->wptr);
3994
3995         /* set the wb address wether it's enabled or not */
3996         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3997         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3998         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3999
4000         mdelay(1);
4001         WREG32(mmCP_RB0_CNTL, tmp);
4002
4003         rb_addr = ring->gpu_addr >> 8;
4004         WREG32(mmCP_RB0_BASE, rb_addr);
4005         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4006
4007         /* no gfx doorbells on iceland */
4008         if (adev->asic_type != CHIP_TOPAZ) {
4009                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4010                 if (ring->use_doorbell) {
4011                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4012                                             DOORBELL_OFFSET, ring->doorbell_index);
4013                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4014                                             DOORBELL_HIT, 0);
4015                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4016                                             DOORBELL_EN, 1);
4017                 } else {
4018                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4019                                             DOORBELL_EN, 0);
4020                 }
4021                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4022
4023                 if (adev->asic_type == CHIP_TONGA) {
4024                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4025                                             DOORBELL_RANGE_LOWER,
4026                                             AMDGPU_DOORBELL_GFX_RING0);
4027                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4028
4029                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4030                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4031                 }
4032
4033         }
4034
4035         /* start the ring */
4036         gfx_v8_0_cp_gfx_start(adev);
4037         ring->ready = true;
4038         r = amdgpu_ring_test_ring(ring);
4039         if (r) {
4040                 ring->ready = false;
4041                 return r;
4042         }
4043
4044         return 0;
4045 }
4046
4047 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4048 {
4049         int i;
4050
4051         if (enable) {
4052                 WREG32(mmCP_MEC_CNTL, 0);
4053         } else {
4054                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4055                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4056                         adev->gfx.compute_ring[i].ready = false;
4057         }
4058         udelay(50);
4059 }
4060
4061 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4062 {
4063         const struct gfx_firmware_header_v1_0 *mec_hdr;
4064         const __le32 *fw_data;
4065         unsigned i, fw_size;
4066
4067         if (!adev->gfx.mec_fw)
4068                 return -EINVAL;
4069
4070         gfx_v8_0_cp_compute_enable(adev, false);
4071
4072         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4073         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4074
4075         fw_data = (const __le32 *)
4076                 (adev->gfx.mec_fw->data +
4077                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4078         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4079
4080         /* MEC1 */
4081         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4082         for (i = 0; i < fw_size; i++)
4083                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4084         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4085
4086         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4087         if (adev->gfx.mec2_fw) {
4088                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4089
4090                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4091                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4092
4093                 fw_data = (const __le32 *)
4094                         (adev->gfx.mec2_fw->data +
4095                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4096                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4097
4098                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4099                 for (i = 0; i < fw_size; i++)
4100                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4101                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4102         }
4103
4104         return 0;
4105 }
4106
4107 struct vi_mqd {
4108         uint32_t header;  /* ordinal0 */
4109         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4110         uint32_t compute_dim_x;  /* ordinal2 */
4111         uint32_t compute_dim_y;  /* ordinal3 */
4112         uint32_t compute_dim_z;  /* ordinal4 */
4113         uint32_t compute_start_x;  /* ordinal5 */
4114         uint32_t compute_start_y;  /* ordinal6 */
4115         uint32_t compute_start_z;  /* ordinal7 */
4116         uint32_t compute_num_thread_x;  /* ordinal8 */
4117         uint32_t compute_num_thread_y;  /* ordinal9 */
4118         uint32_t compute_num_thread_z;  /* ordinal10 */
4119         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4120         uint32_t compute_perfcount_enable;  /* ordinal12 */
4121         uint32_t compute_pgm_lo;  /* ordinal13 */
4122         uint32_t compute_pgm_hi;  /* ordinal14 */
4123         uint32_t compute_tba_lo;  /* ordinal15 */
4124         uint32_t compute_tba_hi;  /* ordinal16 */
4125         uint32_t compute_tma_lo;  /* ordinal17 */
4126         uint32_t compute_tma_hi;  /* ordinal18 */
4127         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4128         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4129         uint32_t compute_vmid;  /* ordinal21 */
4130         uint32_t compute_resource_limits;  /* ordinal22 */
4131         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4132         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4133         uint32_t compute_tmpring_size;  /* ordinal25 */
4134         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4135         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4136         uint32_t compute_restart_x;  /* ordinal28 */
4137         uint32_t compute_restart_y;  /* ordinal29 */
4138         uint32_t compute_restart_z;  /* ordinal30 */
4139         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4140         uint32_t compute_misc_reserved;  /* ordinal32 */
4141         uint32_t compute_dispatch_id;  /* ordinal33 */
4142         uint32_t compute_threadgroup_id;  /* ordinal34 */
4143         uint32_t compute_relaunch;  /* ordinal35 */
4144         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4145         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4146         uint32_t compute_wave_restore_control;  /* ordinal38 */
4147         uint32_t reserved9;  /* ordinal39 */
4148         uint32_t reserved10;  /* ordinal40 */
4149         uint32_t reserved11;  /* ordinal41 */
4150         uint32_t reserved12;  /* ordinal42 */
4151         uint32_t reserved13;  /* ordinal43 */
4152         uint32_t reserved14;  /* ordinal44 */
4153         uint32_t reserved15;  /* ordinal45 */
4154         uint32_t reserved16;  /* ordinal46 */
4155         uint32_t reserved17;  /* ordinal47 */
4156         uint32_t reserved18;  /* ordinal48 */
4157         uint32_t reserved19;  /* ordinal49 */
4158         uint32_t reserved20;  /* ordinal50 */
4159         uint32_t reserved21;  /* ordinal51 */
4160         uint32_t reserved22;  /* ordinal52 */
4161         uint32_t reserved23;  /* ordinal53 */
4162         uint32_t reserved24;  /* ordinal54 */
4163         uint32_t reserved25;  /* ordinal55 */
4164         uint32_t reserved26;  /* ordinal56 */
4165         uint32_t reserved27;  /* ordinal57 */
4166         uint32_t reserved28;  /* ordinal58 */
4167         uint32_t reserved29;  /* ordinal59 */
4168         uint32_t reserved30;  /* ordinal60 */
4169         uint32_t reserved31;  /* ordinal61 */
4170         uint32_t reserved32;  /* ordinal62 */
4171         uint32_t reserved33;  /* ordinal63 */
4172         uint32_t reserved34;  /* ordinal64 */
4173         uint32_t compute_user_data_0;  /* ordinal65 */
4174         uint32_t compute_user_data_1;  /* ordinal66 */
4175         uint32_t compute_user_data_2;  /* ordinal67 */
4176         uint32_t compute_user_data_3;  /* ordinal68 */
4177         uint32_t compute_user_data_4;  /* ordinal69 */
4178         uint32_t compute_user_data_5;  /* ordinal70 */
4179         uint32_t compute_user_data_6;  /* ordinal71 */
4180         uint32_t compute_user_data_7;  /* ordinal72 */
4181         uint32_t compute_user_data_8;  /* ordinal73 */
4182         uint32_t compute_user_data_9;  /* ordinal74 */
4183         uint32_t compute_user_data_10;  /* ordinal75 */
4184         uint32_t compute_user_data_11;  /* ordinal76 */
4185         uint32_t compute_user_data_12;  /* ordinal77 */
4186         uint32_t compute_user_data_13;  /* ordinal78 */
4187         uint32_t compute_user_data_14;  /* ordinal79 */
4188         uint32_t compute_user_data_15;  /* ordinal80 */
4189         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4190         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4191         uint32_t reserved35;  /* ordinal83 */
4192         uint32_t reserved36;  /* ordinal84 */
4193         uint32_t reserved37;  /* ordinal85 */
4194         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4195         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4196         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4197         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4198         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4199         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4200         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4201         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4202         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4203         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4204         uint32_t reserved38;  /* ordinal96 */
4205         uint32_t reserved39;  /* ordinal97 */
4206         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4207         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4208         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4209         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4210         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4211         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4212         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4213         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4214         uint32_t reserved40;  /* ordinal106 */
4215         uint32_t reserved41;  /* ordinal107 */
4216         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4217         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4218         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4219         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4220         uint32_t reserved42;  /* ordinal112 */
4221         uint32_t reserved43;  /* ordinal113 */
4222         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4223         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4224         uint32_t cp_packet_id_lo;  /* ordinal116 */
4225         uint32_t cp_packet_id_hi;  /* ordinal117 */
4226         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4227         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4228         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4229         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4230         uint32_t gds_save_mask_lo;  /* ordinal122 */
4231         uint32_t gds_save_mask_hi;  /* ordinal123 */
4232         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4233         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4234         uint32_t reserved44;  /* ordinal126 */
4235         uint32_t reserved45;  /* ordinal127 */
4236         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4237         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4238         uint32_t cp_hqd_active;  /* ordinal130 */
4239         uint32_t cp_hqd_vmid;  /* ordinal131 */
4240         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4241         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4242         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4243         uint32_t cp_hqd_quantum;  /* ordinal135 */
4244         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4245         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4246         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4247         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4248         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4249         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4250         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4251         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4252         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4253         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4254         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4255         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4256         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4257         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4258         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4259         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4260         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4261         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4262         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4263         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4264         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4265         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4266         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4267         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4268         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4269         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4270         uint32_t cp_mqd_control;  /* ordinal162 */
4271         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4272         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4273         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4274         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4275         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4276         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4277         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4278         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4279         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4280         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4281         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4282         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4283         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4284         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4285         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4286         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4287         uint32_t cp_hqd_error;  /* ordinal179 */
4288         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4289         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4290         uint32_t reserved46;  /* ordinal182 */
4291         uint32_t reserved47;  /* ordinal183 */
4292         uint32_t reserved48;  /* ordinal184 */
4293         uint32_t reserved49;  /* ordinal185 */
4294         uint32_t reserved50;  /* ordinal186 */
4295         uint32_t reserved51;  /* ordinal187 */
4296         uint32_t reserved52;  /* ordinal188 */
4297         uint32_t reserved53;  /* ordinal189 */
4298         uint32_t reserved54;  /* ordinal190 */
4299         uint32_t reserved55;  /* ordinal191 */
4300         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4301         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4302         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4303         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4304         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4305         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4306         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4307         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4308         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4309         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4310         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4311         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4312         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4313         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4314         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4315         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4316         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4317         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4318         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4319         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4320         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4321         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4322         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4323         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4324         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4325         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4326         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4327         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4328         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4329         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4330         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4331         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4332         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4333         uint32_t reserved56;  /* ordinal225 */
4334         uint32_t reserved57;  /* ordinal226 */
4335         uint32_t reserved58;  /* ordinal227 */
4336         uint32_t set_resources_header;  /* ordinal228 */
4337         uint32_t set_resources_dw1;  /* ordinal229 */
4338         uint32_t set_resources_dw2;  /* ordinal230 */
4339         uint32_t set_resources_dw3;  /* ordinal231 */
4340         uint32_t set_resources_dw4;  /* ordinal232 */
4341         uint32_t set_resources_dw5;  /* ordinal233 */
4342         uint32_t set_resources_dw6;  /* ordinal234 */
4343         uint32_t set_resources_dw7;  /* ordinal235 */
4344         uint32_t reserved59;  /* ordinal236 */
4345         uint32_t reserved60;  /* ordinal237 */
4346         uint32_t reserved61;  /* ordinal238 */
4347         uint32_t reserved62;  /* ordinal239 */
4348         uint32_t reserved63;  /* ordinal240 */
4349         uint32_t reserved64;  /* ordinal241 */
4350         uint32_t reserved65;  /* ordinal242 */
4351         uint32_t reserved66;  /* ordinal243 */
4352         uint32_t reserved67;  /* ordinal244 */
4353         uint32_t reserved68;  /* ordinal245 */
4354         uint32_t reserved69;  /* ordinal246 */
4355         uint32_t reserved70;  /* ordinal247 */
4356         uint32_t reserved71;  /* ordinal248 */
4357         uint32_t reserved72;  /* ordinal249 */
4358         uint32_t reserved73;  /* ordinal250 */
4359         uint32_t reserved74;  /* ordinal251 */
4360         uint32_t reserved75;  /* ordinal252 */
4361         uint32_t reserved76;  /* ordinal253 */
4362         uint32_t reserved77;  /* ordinal254 */
4363         uint32_t reserved78;  /* ordinal255 */
4364
4365         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4366 };
4367
4368 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4369 {
4370         int i, r;
4371
4372         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4373                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4374
4375                 if (ring->mqd_obj) {
4376                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4377                         if (unlikely(r != 0))
4378                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4379
4380                         amdgpu_bo_unpin(ring->mqd_obj);
4381                         amdgpu_bo_unreserve(ring->mqd_obj);
4382
4383                         amdgpu_bo_unref(&ring->mqd_obj);
4384                         ring->mqd_obj = NULL;
4385                 }
4386         }
4387 }
4388
4389 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4390 {
4391         int r, i, j;
4392         u32 tmp;
4393         bool use_doorbell = true;
4394         u64 hqd_gpu_addr;
4395         u64 mqd_gpu_addr;
4396         u64 eop_gpu_addr;
4397         u64 wb_gpu_addr;
4398         u32 *buf;
4399         struct vi_mqd *mqd;
4400
4401         /* init the pipes */
4402         mutex_lock(&adev->srbm_mutex);
4403         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4404                 int me = (i < 4) ? 1 : 2;
4405                 int pipe = (i < 4) ? i : (i - 4);
4406
4407                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4408                 eop_gpu_addr >>= 8;
4409
4410                 vi_srbm_select(adev, me, pipe, 0, 0);
4411
4412                 /* write the EOP addr */
4413                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4414                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4415
4416                 /* set the VMID assigned */
4417                 WREG32(mmCP_HQD_VMID, 0);
4418
4419                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4420                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4421                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4422                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4423                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4424         }
4425         vi_srbm_select(adev, 0, 0, 0, 0);
4426         mutex_unlock(&adev->srbm_mutex);
4427
4428         /* init the queues.  Just two for now. */
4429         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4430                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4431
4432                 if (ring->mqd_obj == NULL) {
4433                         r = amdgpu_bo_create(adev,
4434                                              sizeof(struct vi_mqd),
4435                                              PAGE_SIZE, true,
4436                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4437                                              NULL, &ring->mqd_obj);
4438                         if (r) {
4439                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4440                                 return r;
4441                         }
4442                 }
4443
4444                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4445                 if (unlikely(r != 0)) {
4446                         gfx_v8_0_cp_compute_fini(adev);
4447                         return r;
4448                 }
4449                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4450                                   &mqd_gpu_addr);
4451                 if (r) {
4452                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4453                         gfx_v8_0_cp_compute_fini(adev);
4454                         return r;
4455                 }
4456                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4457                 if (r) {
4458                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4459                         gfx_v8_0_cp_compute_fini(adev);
4460                         return r;
4461                 }
4462
4463                 /* init the mqd struct */
4464                 memset(buf, 0, sizeof(struct vi_mqd));
4465
4466                 mqd = (struct vi_mqd *)buf;
4467                 mqd->header = 0xC0310800;
4468                 mqd->compute_pipelinestat_enable = 0x00000001;
4469                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4470                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4471                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4472                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4473                 mqd->compute_misc_reserved = 0x00000003;
4474
4475                 mutex_lock(&adev->srbm_mutex);
4476                 vi_srbm_select(adev, ring->me,
4477                                ring->pipe,
4478                                ring->queue, 0);
4479
4480                 /* disable wptr polling */
4481                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4482                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4483                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4484
4485                 mqd->cp_hqd_eop_base_addr_lo =
4486                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4487                 mqd->cp_hqd_eop_base_addr_hi =
4488                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4489
4490                 /* enable doorbell? */
4491                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4492                 if (use_doorbell) {
4493                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4494                 } else {
4495                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4496                 }
4497                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4498                 mqd->cp_hqd_pq_doorbell_control = tmp;
4499
4500                 /* disable the queue if it's active */
4501                 mqd->cp_hqd_dequeue_request = 0;
4502                 mqd->cp_hqd_pq_rptr = 0;
4503                 mqd->cp_hqd_pq_wptr= 0;
4504                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4505                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4506                         for (j = 0; j < adev->usec_timeout; j++) {
4507                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4508                                         break;
4509                                 udelay(1);
4510                         }
4511                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4512                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4513                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4514                 }
4515
4516                 /* set the pointer to the MQD */
4517                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4518                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4519                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4520                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4521
4522                 /* set MQD vmid to 0 */
4523                 tmp = RREG32(mmCP_MQD_CONTROL);
4524                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4525                 WREG32(mmCP_MQD_CONTROL, tmp);
4526                 mqd->cp_mqd_control = tmp;
4527
4528                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4529                 hqd_gpu_addr = ring->gpu_addr >> 8;
4530                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4531                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4532                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4533                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4534
4535                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4536                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4537                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4538                                     (order_base_2(ring->ring_size / 4) - 1));
4539                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4540                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4541 #ifdef __BIG_ENDIAN
4542                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4543 #endif
4544                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4545                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4546                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4547                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4548                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4549                 mqd->cp_hqd_pq_control = tmp;
4550
4551                 /* set the wb address wether it's enabled or not */
4552                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4553                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4554                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4555                         upper_32_bits(wb_gpu_addr) & 0xffff;
4556                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4557                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4558                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4559                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4560
4561                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4562                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4563                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4564                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4565                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4566                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4567                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4568
4569                 /* enable the doorbell if requested */
4570                 if (use_doorbell) {
4571                         if ((adev->asic_type == CHIP_CARRIZO) ||
4572                             (adev->asic_type == CHIP_FIJI) ||
4573                             (adev->asic_type == CHIP_STONEY) ||
4574                             (adev->asic_type == CHIP_BAFFIN) ||
4575                             (adev->asic_type == CHIP_ELLESMERE)) {
4576                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4577                                        AMDGPU_DOORBELL_KIQ << 2);
4578                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4579                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4580                         }
4581                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4582                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4583                                             DOORBELL_OFFSET, ring->doorbell_index);
4584                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4585                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4586                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4587                         mqd->cp_hqd_pq_doorbell_control = tmp;
4588
4589                 } else {
4590                         mqd->cp_hqd_pq_doorbell_control = 0;
4591                 }
4592                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4593                        mqd->cp_hqd_pq_doorbell_control);
4594
4595                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4596                 ring->wptr = 0;
4597                 mqd->cp_hqd_pq_wptr = ring->wptr;
4598                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4599                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4600
4601                 /* set the vmid for the queue */
4602                 mqd->cp_hqd_vmid = 0;
4603                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4604
4605                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4606                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4607                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4608                 mqd->cp_hqd_persistent_state = tmp;
4609                 if (adev->asic_type == CHIP_STONEY ||
4610                         adev->asic_type == CHIP_BAFFIN ||
4611                         adev->asic_type == CHIP_ELLESMERE) {
4612                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4613                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4614                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4615                 }
4616
4617                 /* activate the queue */
4618                 mqd->cp_hqd_active = 1;
4619                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4620
4621                 vi_srbm_select(adev, 0, 0, 0, 0);
4622                 mutex_unlock(&adev->srbm_mutex);
4623
4624                 amdgpu_bo_kunmap(ring->mqd_obj);
4625                 amdgpu_bo_unreserve(ring->mqd_obj);
4626         }
4627
4628         if (use_doorbell) {
4629                 tmp = RREG32(mmCP_PQ_STATUS);
4630                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4631                 WREG32(mmCP_PQ_STATUS, tmp);
4632         }
4633
4634         gfx_v8_0_cp_compute_enable(adev, true);
4635
4636         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4637                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4638
4639                 ring->ready = true;
4640                 r = amdgpu_ring_test_ring(ring);
4641                 if (r)
4642                         ring->ready = false;
4643         }
4644
4645         return 0;
4646 }
4647
4648 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4649 {
4650         int r;
4651
4652         if (!(adev->flags & AMD_IS_APU))
4653                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4654
4655         if (!adev->pp_enabled) {
4656                 if (!adev->firmware.smu_load) {
4657                         /* legacy firmware loading */
4658                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4659                         if (r)
4660                                 return r;
4661
4662                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4663                         if (r)
4664                                 return r;
4665                 } else {
4666                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4667                                                         AMDGPU_UCODE_ID_CP_CE);
4668                         if (r)
4669                                 return -EINVAL;
4670
4671                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4672                                                         AMDGPU_UCODE_ID_CP_PFP);
4673                         if (r)
4674                                 return -EINVAL;
4675
4676                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4677                                                         AMDGPU_UCODE_ID_CP_ME);
4678                         if (r)
4679                                 return -EINVAL;
4680
4681                         if (adev->asic_type == CHIP_TOPAZ) {
4682                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4683                                 if (r)
4684                                         return r;
4685                         } else {
4686                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4687                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4688                                 if (r)
4689                                         return -EINVAL;
4690                         }
4691                 }
4692         }
4693
4694         r = gfx_v8_0_cp_gfx_resume(adev);
4695         if (r)
4696                 return r;
4697
4698         r = gfx_v8_0_cp_compute_resume(adev);
4699         if (r)
4700                 return r;
4701
4702         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4703
4704         return 0;
4705 }
4706
4707 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4708 {
4709         gfx_v8_0_cp_gfx_enable(adev, enable);
4710         gfx_v8_0_cp_compute_enable(adev, enable);
4711 }
4712
4713 static int gfx_v8_0_hw_init(void *handle)
4714 {
4715         int r;
4716         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4717
4718         gfx_v8_0_init_golden_registers(adev);
4719
4720         gfx_v8_0_gpu_init(adev);
4721
4722         r = gfx_v8_0_rlc_resume(adev);
4723         if (r)
4724                 return r;
4725
4726         r = gfx_v8_0_cp_resume(adev);
4727         if (r)
4728                 return r;
4729
4730         return r;
4731 }
4732
4733 static int gfx_v8_0_hw_fini(void *handle)
4734 {
4735         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4736
4737         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4738         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4739         gfx_v8_0_cp_enable(adev, false);
4740         gfx_v8_0_rlc_stop(adev);
4741         gfx_v8_0_cp_compute_fini(adev);
4742
4743         amdgpu_set_powergating_state(adev,
4744                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4745
4746         return 0;
4747 }
4748
4749 static int gfx_v8_0_suspend(void *handle)
4750 {
4751         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4752
4753         return gfx_v8_0_hw_fini(adev);
4754 }
4755
4756 static int gfx_v8_0_resume(void *handle)
4757 {
4758         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4759
4760         return gfx_v8_0_hw_init(adev);
4761 }
4762
4763 static bool gfx_v8_0_is_idle(void *handle)
4764 {
4765         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4766
4767         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4768                 return false;
4769         else
4770                 return true;
4771 }
4772
4773 static int gfx_v8_0_wait_for_idle(void *handle)
4774 {
4775         unsigned i;
4776         u32 tmp;
4777         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4778
4779         for (i = 0; i < adev->usec_timeout; i++) {
4780                 /* read MC_STATUS */
4781                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4782
4783                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4784                         return 0;
4785                 udelay(1);
4786         }
4787         return -ETIMEDOUT;
4788 }
4789
4790 static void gfx_v8_0_print_status(void *handle)
4791 {
4792         int i;
4793         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4794
4795         dev_info(adev->dev, "GFX 8.x registers\n");
4796         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
4797                  RREG32(mmGRBM_STATUS));
4798         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
4799                  RREG32(mmGRBM_STATUS2));
4800         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4801                  RREG32(mmGRBM_STATUS_SE0));
4802         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4803                  RREG32(mmGRBM_STATUS_SE1));
4804         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4805                  RREG32(mmGRBM_STATUS_SE2));
4806         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4807                  RREG32(mmGRBM_STATUS_SE3));
4808         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
4809         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4810                  RREG32(mmCP_STALLED_STAT1));
4811         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4812                  RREG32(mmCP_STALLED_STAT2));
4813         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4814                  RREG32(mmCP_STALLED_STAT3));
4815         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4816                  RREG32(mmCP_CPF_BUSY_STAT));
4817         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4818                  RREG32(mmCP_CPF_STALLED_STAT1));
4819         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
4820         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
4821         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4822                  RREG32(mmCP_CPC_STALLED_STAT1));
4823         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
4824
4825         for (i = 0; i < 32; i++) {
4826                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
4827                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
4828         }
4829         for (i = 0; i < 16; i++) {
4830                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
4831                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
4832         }
4833         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4834                 dev_info(adev->dev, "  se: %d\n", i);
4835                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
4836                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
4837                          RREG32(mmPA_SC_RASTER_CONFIG));
4838                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
4839                          RREG32(mmPA_SC_RASTER_CONFIG_1));
4840         }
4841         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4842
4843         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4844                  RREG32(mmGB_ADDR_CONFIG));
4845         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4846                  RREG32(mmHDP_ADDR_CONFIG));
4847         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4848                  RREG32(mmDMIF_ADDR_CALC));
4849
4850         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4851                  RREG32(mmCP_MEQ_THRESHOLDS));
4852         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4853                  RREG32(mmSX_DEBUG_1));
4854         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4855                  RREG32(mmTA_CNTL_AUX));
4856         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4857                  RREG32(mmSPI_CONFIG_CNTL));
4858         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4859                  RREG32(mmSQ_CONFIG));
4860         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4861                  RREG32(mmDB_DEBUG));
4862         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4863                  RREG32(mmDB_DEBUG2));
4864         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4865                  RREG32(mmDB_DEBUG3));
4866         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4867                  RREG32(mmCB_HW_CONTROL));
4868         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4869                  RREG32(mmSPI_CONFIG_CNTL_1));
4870         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4871                  RREG32(mmPA_SC_FIFO_SIZE));
4872         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4873                  RREG32(mmVGT_NUM_INSTANCES));
4874         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4875                  RREG32(mmCP_PERFMON_CNTL));
4876         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4877                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4878         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4879                  RREG32(mmVGT_CACHE_INVALIDATION));
4880         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4881                  RREG32(mmVGT_GS_VERTEX_REUSE));
4882         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4883                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4884         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4885                  RREG32(mmPA_CL_ENHANCE));
4886         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4887                  RREG32(mmPA_SC_ENHANCE));
4888
4889         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4890                  RREG32(mmCP_ME_CNTL));
4891         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4892                  RREG32(mmCP_MAX_CONTEXT));
4893         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4894                  RREG32(mmCP_ENDIAN_SWAP));
4895         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4896                  RREG32(mmCP_DEVICE_ID));
4897
4898         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4899                  RREG32(mmCP_SEM_WAIT_TIMER));
4900
4901         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4902                  RREG32(mmCP_RB_WPTR_DELAY));
4903         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4904                  RREG32(mmCP_RB_VMID));
4905         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4906                  RREG32(mmCP_RB0_CNTL));
4907         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4908                  RREG32(mmCP_RB0_WPTR));
4909         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4910                  RREG32(mmCP_RB0_RPTR_ADDR));
4911         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4912                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4913         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4914                  RREG32(mmCP_RB0_CNTL));
4915         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4916                  RREG32(mmCP_RB0_BASE));
4917         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4918                  RREG32(mmCP_RB0_BASE_HI));
4919         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4920                  RREG32(mmCP_MEC_CNTL));
4921         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4922                  RREG32(mmCP_CPF_DEBUG));
4923
4924         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4925                  RREG32(mmSCRATCH_ADDR));
4926         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4927                  RREG32(mmSCRATCH_UMSK));
4928
4929         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4930                  RREG32(mmCP_INT_CNTL_RING0));
4931         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4932                  RREG32(mmRLC_LB_CNTL));
4933         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4934                  RREG32(mmRLC_CNTL));
4935         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4936                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4937         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4938                  RREG32(mmRLC_LB_CNTR_INIT));
4939         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4940                  RREG32(mmRLC_LB_CNTR_MAX));
4941         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4942                  RREG32(mmRLC_LB_INIT_CU_MASK));
4943         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4944                  RREG32(mmRLC_LB_PARAMS));
4945         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4946                  RREG32(mmRLC_LB_CNTL));
4947         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4948                  RREG32(mmRLC_MC_CNTL));
4949         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4950                  RREG32(mmRLC_UCODE_CNTL));
4951
4952         mutex_lock(&adev->srbm_mutex);
4953         for (i = 0; i < 16; i++) {
4954                 vi_srbm_select(adev, 0, 0, 0, i);
4955                 dev_info(adev->dev, "  VM %d:\n", i);
4956                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4957                          RREG32(mmSH_MEM_CONFIG));
4958                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4959                          RREG32(mmSH_MEM_APE1_BASE));
4960                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4961                          RREG32(mmSH_MEM_APE1_LIMIT));
4962                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4963                          RREG32(mmSH_MEM_BASES));
4964         }
4965         vi_srbm_select(adev, 0, 0, 0, 0);
4966         mutex_unlock(&adev->srbm_mutex);
4967 }
4968
4969 static int gfx_v8_0_soft_reset(void *handle)
4970 {
4971         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4972         u32 tmp;
4973         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4974
4975         /* GRBM_STATUS */
4976         tmp = RREG32(mmGRBM_STATUS);
4977         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4978                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4979                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4980                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4981                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4982                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4983                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4984                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4985                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4986                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4987         }
4988
4989         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4990                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4991                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4992                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4993                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4994         }
4995
4996         /* GRBM_STATUS2 */
4997         tmp = RREG32(mmGRBM_STATUS2);
4998         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4999                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5000                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5001
5002         /* SRBM_STATUS */
5003         tmp = RREG32(mmSRBM_STATUS);
5004         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5005                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5006                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5007
5008         if (grbm_soft_reset || srbm_soft_reset) {
5009                 gfx_v8_0_print_status((void *)adev);
5010                 /* stop the rlc */
5011                 gfx_v8_0_rlc_stop(adev);
5012
5013                 /* Disable GFX parsing/prefetching */
5014                 gfx_v8_0_cp_gfx_enable(adev, false);
5015
5016                 /* Disable MEC parsing/prefetching */
5017                 gfx_v8_0_cp_compute_enable(adev, false);
5018
5019                 if (grbm_soft_reset || srbm_soft_reset) {
5020                         tmp = RREG32(mmGMCON_DEBUG);
5021                         tmp = REG_SET_FIELD(tmp,
5022                                             GMCON_DEBUG, GFX_STALL, 1);
5023                         tmp = REG_SET_FIELD(tmp,
5024                                             GMCON_DEBUG, GFX_CLEAR, 1);
5025                         WREG32(mmGMCON_DEBUG, tmp);
5026
5027                         udelay(50);
5028                 }
5029
5030                 if (grbm_soft_reset) {
5031                         tmp = RREG32(mmGRBM_SOFT_RESET);
5032                         tmp |= grbm_soft_reset;
5033                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5034                         WREG32(mmGRBM_SOFT_RESET, tmp);
5035                         tmp = RREG32(mmGRBM_SOFT_RESET);
5036
5037                         udelay(50);
5038
5039                         tmp &= ~grbm_soft_reset;
5040                         WREG32(mmGRBM_SOFT_RESET, tmp);
5041                         tmp = RREG32(mmGRBM_SOFT_RESET);
5042                 }
5043
5044                 if (srbm_soft_reset) {
5045                         tmp = RREG32(mmSRBM_SOFT_RESET);
5046                         tmp |= srbm_soft_reset;
5047                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5048                         WREG32(mmSRBM_SOFT_RESET, tmp);
5049                         tmp = RREG32(mmSRBM_SOFT_RESET);
5050
5051                         udelay(50);
5052
5053                         tmp &= ~srbm_soft_reset;
5054                         WREG32(mmSRBM_SOFT_RESET, tmp);
5055                         tmp = RREG32(mmSRBM_SOFT_RESET);
5056                 }
5057
5058                 if (grbm_soft_reset || srbm_soft_reset) {
5059                         tmp = RREG32(mmGMCON_DEBUG);
5060                         tmp = REG_SET_FIELD(tmp,
5061                                             GMCON_DEBUG, GFX_STALL, 0);
5062                         tmp = REG_SET_FIELD(tmp,
5063                                             GMCON_DEBUG, GFX_CLEAR, 0);
5064                         WREG32(mmGMCON_DEBUG, tmp);
5065                 }
5066
5067                 /* Wait a little for things to settle down */
5068                 udelay(50);
5069                 gfx_v8_0_print_status((void *)adev);
5070         }
5071         return 0;
5072 }
5073
5074 /**
5075  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5076  *
5077  * @adev: amdgpu_device pointer
5078  *
5079  * Fetches a GPU clock counter snapshot.
5080  * Returns the 64 bit clock counter snapshot.
5081  */
5082 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5083 {
5084         uint64_t clock;
5085
5086         mutex_lock(&adev->gfx.gpu_clock_mutex);
5087         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5088         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5089                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5090         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5091         return clock;
5092 }
5093
5094 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5095                                           uint32_t vmid,
5096                                           uint32_t gds_base, uint32_t gds_size,
5097                                           uint32_t gws_base, uint32_t gws_size,
5098                                           uint32_t oa_base, uint32_t oa_size)
5099 {
5100         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5101         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5102
5103         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5104         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5105
5106         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5107         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5108
5109         /* GDS Base */
5110         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5111         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5112                                 WRITE_DATA_DST_SEL(0)));
5113         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5114         amdgpu_ring_write(ring, 0);
5115         amdgpu_ring_write(ring, gds_base);
5116
5117         /* GDS Size */
5118         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5119         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5120                                 WRITE_DATA_DST_SEL(0)));
5121         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5122         amdgpu_ring_write(ring, 0);
5123         amdgpu_ring_write(ring, gds_size);
5124
5125         /* GWS */
5126         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5127         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5128                                 WRITE_DATA_DST_SEL(0)));
5129         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5130         amdgpu_ring_write(ring, 0);
5131         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5132
5133         /* OA */
5134         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5135         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5136                                 WRITE_DATA_DST_SEL(0)));
5137         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5138         amdgpu_ring_write(ring, 0);
5139         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5140 }
5141
5142 static int gfx_v8_0_early_init(void *handle)
5143 {
5144         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145
5146         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5147         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5148         gfx_v8_0_set_ring_funcs(adev);
5149         gfx_v8_0_set_irq_funcs(adev);
5150         gfx_v8_0_set_gds_init(adev);
5151         gfx_v8_0_set_rlc_funcs(adev);
5152
5153         return 0;
5154 }
5155
5156 static int gfx_v8_0_late_init(void *handle)
5157 {
5158         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5159         int r;
5160
5161         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5162         if (r)
5163                 return r;
5164
5165         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5166         if (r)
5167                 return r;
5168
5169         /* requires IBs so do in late init after IB pool is initialized */
5170         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5171         if (r)
5172                 return r;
5173
5174         amdgpu_set_powergating_state(adev,
5175                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5176
5177         return 0;
5178 }
5179
5180 static void baffin_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5181                 bool enable)
5182 {
5183         uint32_t data, temp;
5184
5185         /* Send msg to SMU via Powerplay */
5186         amdgpu_set_powergating_state(adev,
5187                         AMD_IP_BLOCK_TYPE_SMC,
5188                         enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5189
5190         if (enable) {
5191                 /* Enable static MGPG */
5192                 temp = data = RREG32(mmRLC_PG_CNTL);
5193                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5194
5195                 if (temp != data)
5196                         WREG32(mmRLC_PG_CNTL, data);
5197         } else {
5198                 temp = data = RREG32(mmRLC_PG_CNTL);
5199                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5200
5201                 if (temp != data)
5202                         WREG32(mmRLC_PG_CNTL, data);
5203         }
5204 }
5205
5206 static void baffin_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5207                 bool enable)
5208 {
5209         uint32_t data, temp;
5210
5211         if (enable) {
5212                 /* Enable dynamic MGPG */
5213                 temp = data = RREG32(mmRLC_PG_CNTL);
5214                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5215
5216                 if (temp != data)
5217                         WREG32(mmRLC_PG_CNTL, data);
5218         } else {
5219                 temp = data = RREG32(mmRLC_PG_CNTL);
5220                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5221
5222                 if (temp != data)
5223                         WREG32(mmRLC_PG_CNTL, data);
5224         }
5225 }
5226
5227 static void baffin_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5228                 bool enable)
5229 {
5230         uint32_t data, temp;
5231
5232         if (enable) {
5233                 /* Enable quick PG */
5234                 temp = data = RREG32(mmRLC_PG_CNTL);
5235                 data |= 0x100000;
5236
5237                 if (temp != data)
5238                         WREG32(mmRLC_PG_CNTL, data);
5239         } else {
5240                 temp = data = RREG32(mmRLC_PG_CNTL);
5241                 data &= ~0x100000;
5242
5243                 if (temp != data)
5244                         WREG32(mmRLC_PG_CNTL, data);
5245         }
5246 }
5247
5248 static int gfx_v8_0_set_powergating_state(void *handle,
5249                                           enum amd_powergating_state state)
5250 {
5251         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5252
5253         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5254                 return 0;
5255
5256         switch (adev->asic_type) {
5257         case CHIP_BAFFIN:
5258                 if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG)
5259                         baffin_enable_gfx_static_mg_power_gating(adev,
5260                                         state == AMD_PG_STATE_GATE ? true : false);
5261                 else if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_DMG)
5262                         baffin_enable_gfx_dynamic_mg_power_gating(adev,
5263                                         state == AMD_PG_STATE_GATE ? true : false);
5264                 else
5265                         baffin_enable_gfx_quick_mg_power_gating(adev,
5266                                         state == AMD_PG_STATE_GATE ? true : false);
5267                 break;
5268         default:
5269                 break;
5270         }
5271
5272         return 0;
5273 }
5274
5275 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5276                                      uint32_t reg_addr, uint32_t cmd)
5277 {
5278         uint32_t data;
5279
5280         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5281
5282         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5283         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5284
5285         data = RREG32(mmRLC_SERDES_WR_CTRL);
5286         if (adev->asic_type == CHIP_STONEY)
5287                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5288                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5289                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5290                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5291                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5292                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5293                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5294                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5295                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5296         else
5297                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5298                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5299                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5300                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5301                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5302                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5303                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5304                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5305                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5306                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5307                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5308         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5309                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5310                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5311                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5312
5313         WREG32(mmRLC_SERDES_WR_CTRL, data);
5314 }
5315
5316 #define MSG_ENTER_RLC_SAFE_MODE     1
5317 #define MSG_EXIT_RLC_SAFE_MODE      0
5318
5319 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5320 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5321 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5322
5323 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5324 {
5325         u32 data = 0;
5326         unsigned i;
5327
5328         data = RREG32(mmRLC_CNTL);
5329         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5330                 return;
5331
5332         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5333             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5334                                AMD_PG_SUPPORT_GFX_DMG))) {
5335                 data |= RLC_GPR_REG2__REQ_MASK;
5336                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5337                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5338                 WREG32(mmRLC_GPR_REG2, data);
5339
5340                 for (i = 0; i < adev->usec_timeout; i++) {
5341                         if ((RREG32(mmRLC_GPM_STAT) &
5342                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5343                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5344                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5345                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5346                                 break;
5347                         udelay(1);
5348                 }
5349
5350                 for (i = 0; i < adev->usec_timeout; i++) {
5351                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5352                                 break;
5353                         udelay(1);
5354                 }
5355                 adev->gfx.rlc.in_safe_mode = true;
5356         }
5357 }
5358
5359 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5360 {
5361         u32 data;
5362         unsigned i;
5363
5364         data = RREG32(mmRLC_CNTL);
5365         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5366                 return;
5367
5368         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5369             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5370                                AMD_PG_SUPPORT_GFX_DMG))) {
5371                 data |= RLC_GPR_REG2__REQ_MASK;
5372                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5373                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5374                 WREG32(mmRLC_GPR_REG2, data);
5375                 adev->gfx.rlc.in_safe_mode = false;
5376         }
5377
5378         for (i = 0; i < adev->usec_timeout; i++) {
5379                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5380                         break;
5381                 udelay(1);
5382         }
5383 }
5384
5385 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5386 {
5387         u32 data;
5388         unsigned i;
5389
5390         data = RREG32(mmRLC_CNTL);
5391         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5392                 return;
5393
5394         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5395                 data |= RLC_SAFE_MODE__CMD_MASK;
5396                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5397                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5398                 WREG32(mmRLC_SAFE_MODE, data);
5399
5400                 for (i = 0; i < adev->usec_timeout; i++) {
5401                         if ((RREG32(mmRLC_GPM_STAT) &
5402                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5403                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5404                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5405                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5406                                 break;
5407                         udelay(1);
5408                 }
5409
5410                 for (i = 0; i < adev->usec_timeout; i++) {
5411                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5412                                 break;
5413                         udelay(1);
5414                 }
5415                 adev->gfx.rlc.in_safe_mode = true;
5416         }
5417 }
5418
5419 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5420 {
5421         u32 data = 0;
5422         unsigned i;
5423
5424         data = RREG32(mmRLC_CNTL);
5425         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5426                 return;
5427
5428         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5429                 if (adev->gfx.rlc.in_safe_mode) {
5430                         data |= RLC_SAFE_MODE__CMD_MASK;
5431                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5432                         WREG32(mmRLC_SAFE_MODE, data);
5433                         adev->gfx.rlc.in_safe_mode = false;
5434                 }
5435         }
5436
5437         for (i = 0; i < adev->usec_timeout; i++) {
5438                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5439                         break;
5440                 udelay(1);
5441         }
5442 }
5443
5444 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5445 {
5446         adev->gfx.rlc.in_safe_mode = true;
5447 }
5448
5449 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5450 {
5451         adev->gfx.rlc.in_safe_mode = false;
5452 }
5453
5454 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5455         .enter_safe_mode = cz_enter_rlc_safe_mode,
5456         .exit_safe_mode = cz_exit_rlc_safe_mode
5457 };
5458
5459 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5460         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5461         .exit_safe_mode = iceland_exit_rlc_safe_mode
5462 };
5463
5464 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5465         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5466         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5467 };
5468
5469 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5470                                                       bool enable)
5471 {
5472         uint32_t temp, data;
5473
5474         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5475
5476         /* It is disabled by HW by default */
5477         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5478                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5479                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5480                                 /* 1 - RLC memory Light sleep */
5481                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5482                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5483                                 if (temp != data)
5484                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5485                         }
5486
5487                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5488                                 /* 2 - CP memory Light sleep */
5489                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5490                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5491                                 if (temp != data)
5492                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5493                         }
5494                 }
5495
5496                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5497                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5498                 if (adev->flags & AMD_IS_APU)
5499                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5500                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5501                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5502                 else
5503                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5504                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5505                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5506                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5507
5508                 if (temp != data)
5509                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5510
5511                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5512                 gfx_v8_0_wait_for_rlc_serdes(adev);
5513
5514                 /* 5 - clear mgcg override */
5515                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5516
5517                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5518                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5519                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5520                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5521                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5522                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5523                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5524                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5525                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5526                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5527                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5528                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5529                         if (temp != data)
5530                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5531                 }
5532                 udelay(50);
5533
5534                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5535                 gfx_v8_0_wait_for_rlc_serdes(adev);
5536         } else {
5537                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5538                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5539                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5540                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5541                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5542                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5543                 if (temp != data)
5544                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5545
5546                 /* 2 - disable MGLS in RLC */
5547                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5548                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5549                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5550                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5551                 }
5552
5553                 /* 3 - disable MGLS in CP */
5554                 data = RREG32(mmCP_MEM_SLP_CNTL);
5555                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5556                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5557                         WREG32(mmCP_MEM_SLP_CNTL, data);
5558                 }
5559
5560                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5561                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5562                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5563                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5564                 if (temp != data)
5565                         WREG32(mmCGTS_SM_CTRL_REG, data);
5566
5567                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5568                 gfx_v8_0_wait_for_rlc_serdes(adev);
5569
5570                 /* 6 - set mgcg override */
5571                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5572
5573                 udelay(50);
5574
5575                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5576                 gfx_v8_0_wait_for_rlc_serdes(adev);
5577         }
5578
5579         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5580 }
5581
5582 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5583                                                       bool enable)
5584 {
5585         uint32_t temp, temp1, data, data1;
5586
5587         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5588
5589         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5590
5591         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5592                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5593                  * Cmp_busy/GFX_Idle interrupts
5594                  */
5595                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5596
5597                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5598                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5599                 if (temp1 != data1)
5600                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5601
5602                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5603                 gfx_v8_0_wait_for_rlc_serdes(adev);
5604
5605                 /* 3 - clear cgcg override */
5606                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5607
5608                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5609                 gfx_v8_0_wait_for_rlc_serdes(adev);
5610
5611                 /* 4 - write cmd to set CGLS */
5612                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5613
5614                 /* 5 - enable cgcg */
5615                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5616
5617                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5618                         /* enable cgls*/
5619                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5620
5621                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5622                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5623
5624                         if (temp1 != data1)
5625                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5626                 } else {
5627                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5628                 }
5629
5630                 if (temp != data)
5631                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5632         } else {
5633                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5634                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5635
5636                 /* TEST CGCG */
5637                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5638                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5639                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5640                 if (temp1 != data1)
5641                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5642
5643                 /* read gfx register to wake up cgcg */
5644                 RREG32(mmCB_CGTT_SCLK_CTRL);
5645                 RREG32(mmCB_CGTT_SCLK_CTRL);
5646                 RREG32(mmCB_CGTT_SCLK_CTRL);
5647                 RREG32(mmCB_CGTT_SCLK_CTRL);
5648
5649                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5650                 gfx_v8_0_wait_for_rlc_serdes(adev);
5651
5652                 /* write cmd to Set CGCG Overrride */
5653                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5654
5655                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5656                 gfx_v8_0_wait_for_rlc_serdes(adev);
5657
5658                 /* write cmd to Clear CGLS */
5659                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5660
5661                 /* disable cgcg, cgls should be disabled too. */
5662                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5663                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5664                 if (temp != data)
5665                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5666         }
5667
5668         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5669 }
5670 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5671                                             bool enable)
5672 {
5673         if (enable) {
5674                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5675                  * ===  MGCG + MGLS + TS(CG/LS) ===
5676                  */
5677                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5678                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5679         } else {
5680                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5681                  * ===  CGCG + CGLS ===
5682                  */
5683                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5684                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5685         }
5686         return 0;
5687 }
5688
5689 static int gfx_v8_0_set_clockgating_state(void *handle,
5690                                           enum amd_clockgating_state state)
5691 {
5692         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5693
5694         switch (adev->asic_type) {
5695         case CHIP_FIJI:
5696         case CHIP_CARRIZO:
5697         case CHIP_STONEY:
5698                 gfx_v8_0_update_gfx_clock_gating(adev,
5699                                                  state == AMD_CG_STATE_GATE ? true : false);
5700                 break;
5701         default:
5702                 break;
5703         }
5704         return 0;
5705 }
5706
5707 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5708 {
5709         u32 rptr;
5710
5711         rptr = ring->adev->wb.wb[ring->rptr_offs];
5712
5713         return rptr;
5714 }
5715
5716 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5717 {
5718         struct amdgpu_device *adev = ring->adev;
5719         u32 wptr;
5720
5721         if (ring->use_doorbell)
5722                 /* XXX check if swapping is necessary on BE */
5723                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5724         else
5725                 wptr = RREG32(mmCP_RB0_WPTR);
5726
5727         return wptr;
5728 }
5729
5730 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5731 {
5732         struct amdgpu_device *adev = ring->adev;
5733
5734         if (ring->use_doorbell) {
5735                 /* XXX check if swapping is necessary on BE */
5736                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5737                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5738         } else {
5739                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5740                 (void)RREG32(mmCP_RB0_WPTR);
5741         }
5742 }
5743
5744 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5745 {
5746         u32 ref_and_mask, reg_mem_engine;
5747
5748         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5749                 switch (ring->me) {
5750                 case 1:
5751                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5752                         break;
5753                 case 2:
5754                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5755                         break;
5756                 default:
5757                         return;
5758                 }
5759                 reg_mem_engine = 0;
5760         } else {
5761                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5762                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5763         }
5764
5765         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5766         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5767                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5768                                  reg_mem_engine));
5769         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5770         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5771         amdgpu_ring_write(ring, ref_and_mask);
5772         amdgpu_ring_write(ring, ref_and_mask);
5773         amdgpu_ring_write(ring, 0x20); /* poll interval */
5774 }
5775
5776 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5777 {
5778         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5779         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5780                                  WRITE_DATA_DST_SEL(0) |
5781                                  WR_CONFIRM));
5782         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5783         amdgpu_ring_write(ring, 0);
5784         amdgpu_ring_write(ring, 1);
5785
5786 }
5787
5788 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5789                                   struct amdgpu_ib *ib)
5790 {
5791         bool need_ctx_switch = ring->current_ctx != ib->ctx;
5792         u32 header, control = 0;
5793         u32 next_rptr = ring->wptr + 5;
5794
5795         /* drop the CE preamble IB for the same context */
5796         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
5797                 return;
5798
5799         if (need_ctx_switch)
5800                 next_rptr += 2;
5801
5802         next_rptr += 4;
5803         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5804         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5805         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5806         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5807         amdgpu_ring_write(ring, next_rptr);
5808
5809         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5810         if (need_ctx_switch) {
5811                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5812                 amdgpu_ring_write(ring, 0);
5813         }
5814
5815         if (ib->flags & AMDGPU_IB_FLAG_CE)
5816                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5817         else
5818                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5819
5820         control |= ib->length_dw | (ib->vm_id << 24);
5821
5822         amdgpu_ring_write(ring, header);
5823         amdgpu_ring_write(ring,
5824 #ifdef __BIG_ENDIAN
5825                           (2 << 0) |
5826 #endif
5827                           (ib->gpu_addr & 0xFFFFFFFC));
5828         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5829         amdgpu_ring_write(ring, control);
5830 }
5831
5832 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5833                                   struct amdgpu_ib *ib)
5834 {
5835         u32 header, control = 0;
5836         u32 next_rptr = ring->wptr + 5;
5837
5838         control |= INDIRECT_BUFFER_VALID;
5839
5840         next_rptr += 4;
5841         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5842         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5843         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5844         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5845         amdgpu_ring_write(ring, next_rptr);
5846
5847         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5848
5849         control |= ib->length_dw | (ib->vm_id << 24);
5850
5851         amdgpu_ring_write(ring, header);
5852         amdgpu_ring_write(ring,
5853 #ifdef __BIG_ENDIAN
5854                                           (2 << 0) |
5855 #endif
5856                                           (ib->gpu_addr & 0xFFFFFFFC));
5857         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5858         amdgpu_ring_write(ring, control);
5859 }
5860
5861 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5862                                          u64 seq, unsigned flags)
5863 {
5864         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5865         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5866
5867         /* EVENT_WRITE_EOP - flush caches, send int */
5868         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5869         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5870                                  EOP_TC_ACTION_EN |
5871                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5872                                  EVENT_INDEX(5)));
5873         amdgpu_ring_write(ring, addr & 0xfffffffc);
5874         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5875                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5876         amdgpu_ring_write(ring, lower_32_bits(seq));
5877         amdgpu_ring_write(ring, upper_32_bits(seq));
5878
5879 }
5880
5881 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5882 {
5883         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5884         uint32_t seq = ring->fence_drv.sync_seq;
5885         uint64_t addr = ring->fence_drv.gpu_addr;
5886
5887         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5888         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5889                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5890                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5891         amdgpu_ring_write(ring, addr & 0xfffffffc);
5892         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5893         amdgpu_ring_write(ring, seq);
5894         amdgpu_ring_write(ring, 0xffffffff);
5895         amdgpu_ring_write(ring, 4); /* poll interval */
5896
5897         if (usepfp) {
5898                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5899                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5900                 amdgpu_ring_write(ring, 0);
5901                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5902                 amdgpu_ring_write(ring, 0);
5903         }
5904 }
5905
5906 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5907                                         unsigned vm_id, uint64_t pd_addr)
5908 {
5909         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5910
5911         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5912         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5913                                  WRITE_DATA_DST_SEL(0)) |
5914                                  WR_CONFIRM);
5915         if (vm_id < 8) {
5916                 amdgpu_ring_write(ring,
5917                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5918         } else {
5919                 amdgpu_ring_write(ring,
5920                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5921         }
5922         amdgpu_ring_write(ring, 0);
5923         amdgpu_ring_write(ring, pd_addr >> 12);
5924
5925         /* bits 0-15 are the VM contexts0-15 */
5926         /* invalidate the cache */
5927         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5928         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5929                                  WRITE_DATA_DST_SEL(0)));
5930         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5931         amdgpu_ring_write(ring, 0);
5932         amdgpu_ring_write(ring, 1 << vm_id);
5933
5934         /* wait for the invalidate to complete */
5935         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5936         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5937                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5938                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5939         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5940         amdgpu_ring_write(ring, 0);
5941         amdgpu_ring_write(ring, 0); /* ref */
5942         amdgpu_ring_write(ring, 0); /* mask */
5943         amdgpu_ring_write(ring, 0x20); /* poll interval */
5944
5945         /* compute doesn't have PFP */
5946         if (usepfp) {
5947                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5948                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5949                 amdgpu_ring_write(ring, 0x0);
5950                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5951                 amdgpu_ring_write(ring, 0);
5952                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5953                 amdgpu_ring_write(ring, 0);
5954         }
5955 }
5956
5957 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5958 {
5959         return ring->adev->wb.wb[ring->rptr_offs];
5960 }
5961
5962 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5963 {
5964         return ring->adev->wb.wb[ring->wptr_offs];
5965 }
5966
5967 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5968 {
5969         struct amdgpu_device *adev = ring->adev;
5970
5971         /* XXX check if swapping is necessary on BE */
5972         adev->wb.wb[ring->wptr_offs] = ring->wptr;
5973         WDOORBELL32(ring->doorbell_index, ring->wptr);
5974 }
5975
5976 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5977                                              u64 addr, u64 seq,
5978                                              unsigned flags)
5979 {
5980         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5981         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5982
5983         /* RELEASE_MEM - flush caches, send int */
5984         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5985         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5986                                  EOP_TC_ACTION_EN |
5987                                  EOP_TC_WB_ACTION_EN |
5988                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5989                                  EVENT_INDEX(5)));
5990         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5991         amdgpu_ring_write(ring, addr & 0xfffffffc);
5992         amdgpu_ring_write(ring, upper_32_bits(addr));
5993         amdgpu_ring_write(ring, lower_32_bits(seq));
5994         amdgpu_ring_write(ring, upper_32_bits(seq));
5995 }
5996
5997 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5998                                                  enum amdgpu_interrupt_state state)
5999 {
6000         u32 cp_int_cntl;
6001
6002         switch (state) {
6003         case AMDGPU_IRQ_STATE_DISABLE:
6004                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6005                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6006                                             TIME_STAMP_INT_ENABLE, 0);
6007                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6008                 break;
6009         case AMDGPU_IRQ_STATE_ENABLE:
6010                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6011                 cp_int_cntl =
6012                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6013                                       TIME_STAMP_INT_ENABLE, 1);
6014                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6015                 break;
6016         default:
6017                 break;
6018         }
6019 }
6020
6021 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6022                                                      int me, int pipe,
6023                                                      enum amdgpu_interrupt_state state)
6024 {
6025         u32 mec_int_cntl, mec_int_cntl_reg;
6026
6027         /*
6028          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6029          * handles the setting of interrupts for this specific pipe. All other
6030          * pipes' interrupts are set by amdkfd.
6031          */
6032
6033         if (me == 1) {
6034                 switch (pipe) {
6035                 case 0:
6036                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6037                         break;
6038                 default:
6039                         DRM_DEBUG("invalid pipe %d\n", pipe);
6040                         return;
6041                 }
6042         } else {
6043                 DRM_DEBUG("invalid me %d\n", me);
6044                 return;
6045         }
6046
6047         switch (state) {
6048         case AMDGPU_IRQ_STATE_DISABLE:
6049                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6050                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6051                                              TIME_STAMP_INT_ENABLE, 0);
6052                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6053                 break;
6054         case AMDGPU_IRQ_STATE_ENABLE:
6055                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6056                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6057                                              TIME_STAMP_INT_ENABLE, 1);
6058                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6059                 break;
6060         default:
6061                 break;
6062         }
6063 }
6064
6065 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6066                                              struct amdgpu_irq_src *source,
6067                                              unsigned type,
6068                                              enum amdgpu_interrupt_state state)
6069 {
6070         u32 cp_int_cntl;
6071
6072         switch (state) {
6073         case AMDGPU_IRQ_STATE_DISABLE:
6074                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6075                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6076                                             PRIV_REG_INT_ENABLE, 0);
6077                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6078                 break;
6079         case AMDGPU_IRQ_STATE_ENABLE:
6080                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6081                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6082                                             PRIV_REG_INT_ENABLE, 1);
6083                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6084                 break;
6085         default:
6086                 break;
6087         }
6088
6089         return 0;
6090 }
6091
6092 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6093                                               struct amdgpu_irq_src *source,
6094                                               unsigned type,
6095                                               enum amdgpu_interrupt_state state)
6096 {
6097         u32 cp_int_cntl;
6098
6099         switch (state) {
6100         case AMDGPU_IRQ_STATE_DISABLE:
6101                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6102                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6103                                             PRIV_INSTR_INT_ENABLE, 0);
6104                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6105                 break;
6106         case AMDGPU_IRQ_STATE_ENABLE:
6107                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6108                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6109                                             PRIV_INSTR_INT_ENABLE, 1);
6110                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6111                 break;
6112         default:
6113                 break;
6114         }
6115
6116         return 0;
6117 }
6118
6119 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6120                                             struct amdgpu_irq_src *src,
6121                                             unsigned type,
6122                                             enum amdgpu_interrupt_state state)
6123 {
6124         switch (type) {
6125         case AMDGPU_CP_IRQ_GFX_EOP:
6126                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6127                 break;
6128         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6129                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6130                 break;
6131         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6132                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6133                 break;
6134         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6135                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6136                 break;
6137         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6138                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6139                 break;
6140         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6141                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6142                 break;
6143         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6144                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6145                 break;
6146         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6147                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6148                 break;
6149         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6150                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6151                 break;
6152         default:
6153                 break;
6154         }
6155         return 0;
6156 }
6157
6158 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6159                             struct amdgpu_irq_src *source,
6160                             struct amdgpu_iv_entry *entry)
6161 {
6162         int i;
6163         u8 me_id, pipe_id, queue_id;
6164         struct amdgpu_ring *ring;
6165
6166         DRM_DEBUG("IH: CP EOP\n");
6167         me_id = (entry->ring_id & 0x0c) >> 2;
6168         pipe_id = (entry->ring_id & 0x03) >> 0;
6169         queue_id = (entry->ring_id & 0x70) >> 4;
6170
6171         switch (me_id) {
6172         case 0:
6173                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6174                 break;
6175         case 1:
6176         case 2:
6177                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6178                         ring = &adev->gfx.compute_ring[i];
6179                         /* Per-queue interrupt is supported for MEC starting from VI.
6180                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6181                           */
6182                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6183                                 amdgpu_fence_process(ring);
6184                 }
6185                 break;
6186         }
6187         return 0;
6188 }
6189
6190 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6191                                  struct amdgpu_irq_src *source,
6192                                  struct amdgpu_iv_entry *entry)
6193 {
6194         DRM_ERROR("Illegal register access in command stream\n");
6195         schedule_work(&adev->reset_work);
6196         return 0;
6197 }
6198
6199 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6200                                   struct amdgpu_irq_src *source,
6201                                   struct amdgpu_iv_entry *entry)
6202 {
6203         DRM_ERROR("Illegal instruction in command stream\n");
6204         schedule_work(&adev->reset_work);
6205         return 0;
6206 }
6207
6208 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6209         .early_init = gfx_v8_0_early_init,
6210         .late_init = gfx_v8_0_late_init,
6211         .sw_init = gfx_v8_0_sw_init,
6212         .sw_fini = gfx_v8_0_sw_fini,
6213         .hw_init = gfx_v8_0_hw_init,
6214         .hw_fini = gfx_v8_0_hw_fini,
6215         .suspend = gfx_v8_0_suspend,
6216         .resume = gfx_v8_0_resume,
6217         .is_idle = gfx_v8_0_is_idle,
6218         .wait_for_idle = gfx_v8_0_wait_for_idle,
6219         .soft_reset = gfx_v8_0_soft_reset,
6220         .print_status = gfx_v8_0_print_status,
6221         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6222         .set_powergating_state = gfx_v8_0_set_powergating_state,
6223 };
6224
6225 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6226         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6227         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6228         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6229         .parse_cs = NULL,
6230         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6231         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6232         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6233         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6234         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6235         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6236         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6237         .test_ring = gfx_v8_0_ring_test_ring,
6238         .test_ib = gfx_v8_0_ring_test_ib,
6239         .insert_nop = amdgpu_ring_insert_nop,
6240         .pad_ib = amdgpu_ring_generic_pad_ib,
6241 };
6242
6243 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6244         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6245         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6246         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6247         .parse_cs = NULL,
6248         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6249         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6250         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6251         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6252         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6253         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6254         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6255         .test_ring = gfx_v8_0_ring_test_ring,
6256         .test_ib = gfx_v8_0_ring_test_ib,
6257         .insert_nop = amdgpu_ring_insert_nop,
6258         .pad_ib = amdgpu_ring_generic_pad_ib,
6259 };
6260
6261 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6262 {
6263         int i;
6264
6265         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6266                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6267
6268         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6269                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6270 }
6271
6272 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6273         .set = gfx_v8_0_set_eop_interrupt_state,
6274         .process = gfx_v8_0_eop_irq,
6275 };
6276
6277 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6278         .set = gfx_v8_0_set_priv_reg_fault_state,
6279         .process = gfx_v8_0_priv_reg_irq,
6280 };
6281
6282 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6283         .set = gfx_v8_0_set_priv_inst_fault_state,
6284         .process = gfx_v8_0_priv_inst_irq,
6285 };
6286
6287 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6288 {
6289         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6290         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6291
6292         adev->gfx.priv_reg_irq.num_types = 1;
6293         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6294
6295         adev->gfx.priv_inst_irq.num_types = 1;
6296         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6297 }
6298
6299 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6300 {
6301         switch (adev->asic_type) {
6302         case CHIP_TOPAZ:
6303         case CHIP_STONEY:
6304                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6305                 break;
6306         case CHIP_CARRIZO:
6307                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6308                 break;
6309         default:
6310                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6311                 break;
6312         }
6313 }
6314
6315 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6316 {
6317         /* init asci gds info */
6318         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6319         adev->gds.gws.total_size = 64;
6320         adev->gds.oa.total_size = 16;
6321
6322         if (adev->gds.mem.total_size == 64 * 1024) {
6323                 adev->gds.mem.gfx_partition_size = 4096;
6324                 adev->gds.mem.cs_partition_size = 4096;
6325
6326                 adev->gds.gws.gfx_partition_size = 4;
6327                 adev->gds.gws.cs_partition_size = 4;
6328
6329                 adev->gds.oa.gfx_partition_size = 4;
6330                 adev->gds.oa.cs_partition_size = 1;
6331         } else {
6332                 adev->gds.mem.gfx_partition_size = 1024;
6333                 adev->gds.mem.cs_partition_size = 1024;
6334
6335                 adev->gds.gws.gfx_partition_size = 16;
6336                 adev->gds.gws.cs_partition_size = 16;
6337
6338                 adev->gds.oa.gfx_partition_size = 4;
6339                 adev->gds.oa.cs_partition_size = 4;
6340         }
6341 }
6342
6343 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6344 {
6345         u32 data, mask;
6346
6347         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6348         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6349
6350         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6351         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6352
6353         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6354
6355         return (~data) & mask;
6356 }
6357
6358 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
6359                          struct amdgpu_cu_info *cu_info)
6360 {
6361         int i, j, k, counter, active_cu_number = 0;
6362         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6363
6364         if (!adev || !cu_info)
6365                 return -EINVAL;
6366
6367         memset(cu_info, 0, sizeof(*cu_info));
6368
6369         mutex_lock(&adev->grbm_idx_mutex);
6370         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6371                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6372                         mask = 1;
6373                         ao_bitmap = 0;
6374                         counter = 0;
6375                         gfx_v8_0_select_se_sh(adev, i, j);
6376                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6377                         cu_info->bitmap[i][j] = bitmap;
6378
6379                         for (k = 0; k < 16; k ++) {
6380                                 if (bitmap & mask) {
6381                                         if (counter < 2)
6382                                                 ao_bitmap |= mask;
6383                                         counter ++;
6384                                 }
6385                                 mask <<= 1;
6386                         }
6387                         active_cu_number += counter;
6388                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6389                 }
6390         }
6391         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6392         mutex_unlock(&adev->grbm_idx_mutex);
6393
6394         cu_info->number = active_cu_number;
6395         cu_info->ao_cu_mask = ao_cu_mask;
6396
6397         return 0;
6398 }