]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu: refine gfx_v8 pg code.
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660
661 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
662 {
663         switch (adev->asic_type) {
664         case CHIP_TOPAZ:
665                 amdgpu_program_register_sequence(adev,
666                                                  iceland_mgcg_cgcg_init,
667                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
668                 amdgpu_program_register_sequence(adev,
669                                                  golden_settings_iceland_a11,
670                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
671                 amdgpu_program_register_sequence(adev,
672                                                  iceland_golden_common_all,
673                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
674                 break;
675         case CHIP_FIJI:
676                 amdgpu_program_register_sequence(adev,
677                                                  fiji_mgcg_cgcg_init,
678                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
679                 amdgpu_program_register_sequence(adev,
680                                                  golden_settings_fiji_a10,
681                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
682                 amdgpu_program_register_sequence(adev,
683                                                  fiji_golden_common_all,
684                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
685                 break;
686
687         case CHIP_TONGA:
688                 amdgpu_program_register_sequence(adev,
689                                                  tonga_mgcg_cgcg_init,
690                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_tonga_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  tonga_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
697                 break;
698         case CHIP_POLARIS11:
699         case CHIP_POLARIS12:
700                 amdgpu_program_register_sequence(adev,
701                                                  golden_settings_polaris11_a11,
702                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
703                 amdgpu_program_register_sequence(adev,
704                                                  polaris11_golden_common_all,
705                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
706                 break;
707         case CHIP_POLARIS10:
708                 amdgpu_program_register_sequence(adev,
709                                                  golden_settings_polaris10_a11,
710                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
711                 amdgpu_program_register_sequence(adev,
712                                                  polaris10_golden_common_all,
713                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
714                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
715                 if (adev->pdev->revision == 0xc7 &&
716                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
717                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
718                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
719                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
720                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
721                 }
722                 break;
723         case CHIP_CARRIZO:
724                 amdgpu_program_register_sequence(adev,
725                                                  cz_mgcg_cgcg_init,
726                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
727                 amdgpu_program_register_sequence(adev,
728                                                  cz_golden_settings_a11,
729                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
730                 amdgpu_program_register_sequence(adev,
731                                                  cz_golden_common_all,
732                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
733                 break;
734         case CHIP_STONEY:
735                 amdgpu_program_register_sequence(adev,
736                                                  stoney_mgcg_cgcg_init,
737                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
738                 amdgpu_program_register_sequence(adev,
739                                                  stoney_golden_settings_a11,
740                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
741                 amdgpu_program_register_sequence(adev,
742                                                  stoney_golden_common_all,
743                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
744                 break;
745         default:
746                 break;
747         }
748 }
749
750 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
751 {
752         int i;
753
754         adev->gfx.scratch.num_reg = 7;
755         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
757                 adev->gfx.scratch.free[i] = true;
758                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
759         }
760 }
761
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763 {
764         struct amdgpu_device *adev = ring->adev;
765         uint32_t scratch;
766         uint32_t tmp = 0;
767         unsigned i;
768         int r;
769
770         r = amdgpu_gfx_scratch_get(adev, &scratch);
771         if (r) {
772                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
773                 return r;
774         }
775         WREG32(scratch, 0xCAFEDEAD);
776         r = amdgpu_ring_alloc(ring, 3);
777         if (r) {
778                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
779                           ring->idx, r);
780                 amdgpu_gfx_scratch_free(adev, scratch);
781                 return r;
782         }
783         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
784         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
785         amdgpu_ring_write(ring, 0xDEADBEEF);
786         amdgpu_ring_commit(ring);
787
788         for (i = 0; i < adev->usec_timeout; i++) {
789                 tmp = RREG32(scratch);
790                 if (tmp == 0xDEADBEEF)
791                         break;
792                 DRM_UDELAY(1);
793         }
794         if (i < adev->usec_timeout) {
795                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
796                          ring->idx, i);
797         } else {
798                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
799                           ring->idx, scratch, tmp);
800                 r = -EINVAL;
801         }
802         amdgpu_gfx_scratch_free(adev, scratch);
803         return r;
804 }
805
806 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
807 {
808         struct amdgpu_device *adev = ring->adev;
809         struct amdgpu_ib ib;
810         struct dma_fence *f = NULL;
811         uint32_t scratch;
812         uint32_t tmp = 0;
813         long r;
814
815         r = amdgpu_gfx_scratch_get(adev, &scratch);
816         if (r) {
817                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
818                 return r;
819         }
820         WREG32(scratch, 0xCAFEDEAD);
821         memset(&ib, 0, sizeof(ib));
822         r = amdgpu_ib_get(adev, NULL, 256, &ib);
823         if (r) {
824                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
825                 goto err1;
826         }
827         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
828         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
829         ib.ptr[2] = 0xDEADBEEF;
830         ib.length_dw = 3;
831
832         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
833         if (r)
834                 goto err2;
835
836         r = dma_fence_wait_timeout(f, false, timeout);
837         if (r == 0) {
838                 DRM_ERROR("amdgpu: IB test timed out.\n");
839                 r = -ETIMEDOUT;
840                 goto err2;
841         } else if (r < 0) {
842                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
843                 goto err2;
844         }
845         tmp = RREG32(scratch);
846         if (tmp == 0xDEADBEEF) {
847                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
848                 r = 0;
849         } else {
850                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
851                           scratch, tmp);
852                 r = -EINVAL;
853         }
854 err2:
855         amdgpu_ib_free(adev, &ib, NULL);
856         dma_fence_put(f);
857 err1:
858         amdgpu_gfx_scratch_free(adev, scratch);
859         return r;
860 }
861
862
863 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
864         release_firmware(adev->gfx.pfp_fw);
865         adev->gfx.pfp_fw = NULL;
866         release_firmware(adev->gfx.me_fw);
867         adev->gfx.me_fw = NULL;
868         release_firmware(adev->gfx.ce_fw);
869         adev->gfx.ce_fw = NULL;
870         release_firmware(adev->gfx.rlc_fw);
871         adev->gfx.rlc_fw = NULL;
872         release_firmware(adev->gfx.mec_fw);
873         adev->gfx.mec_fw = NULL;
874         if ((adev->asic_type != CHIP_STONEY) &&
875             (adev->asic_type != CHIP_TOPAZ))
876                 release_firmware(adev->gfx.mec2_fw);
877         adev->gfx.mec2_fw = NULL;
878
879         kfree(adev->gfx.rlc.register_list_format);
880 }
881
882 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
883 {
884         const char *chip_name;
885         char fw_name[30];
886         int err;
887         struct amdgpu_firmware_info *info = NULL;
888         const struct common_firmware_header *header = NULL;
889         const struct gfx_firmware_header_v1_0 *cp_hdr;
890         const struct rlc_firmware_header_v2_0 *rlc_hdr;
891         unsigned int *tmp = NULL, i;
892
893         DRM_DEBUG("\n");
894
895         switch (adev->asic_type) {
896         case CHIP_TOPAZ:
897                 chip_name = "topaz";
898                 break;
899         case CHIP_TONGA:
900                 chip_name = "tonga";
901                 break;
902         case CHIP_CARRIZO:
903                 chip_name = "carrizo";
904                 break;
905         case CHIP_FIJI:
906                 chip_name = "fiji";
907                 break;
908         case CHIP_POLARIS11:
909                 chip_name = "polaris11";
910                 break;
911         case CHIP_POLARIS10:
912                 chip_name = "polaris10";
913                 break;
914         case CHIP_POLARIS12:
915                 chip_name = "polaris12";
916                 break;
917         case CHIP_STONEY:
918                 chip_name = "stoney";
919                 break;
920         default:
921                 BUG();
922         }
923
924         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
925         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
926         if (err)
927                 goto out;
928         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
929         if (err)
930                 goto out;
931         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
932         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934
935         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
936         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
937         if (err)
938                 goto out;
939         err = amdgpu_ucode_validate(adev->gfx.me_fw);
940         if (err)
941                 goto out;
942         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
943         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945
946         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
947         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951         if (err)
952                 goto out;
953         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
954         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
958         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
959         if (err)
960                 goto out;
961         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
962         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
963         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
964         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
965
966         adev->gfx.rlc.save_and_restore_offset =
967                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
968         adev->gfx.rlc.clear_state_descriptor_offset =
969                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
970         adev->gfx.rlc.avail_scratch_ram_locations =
971                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
972         adev->gfx.rlc.reg_restore_list_size =
973                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
974         adev->gfx.rlc.reg_list_format_start =
975                         le32_to_cpu(rlc_hdr->reg_list_format_start);
976         adev->gfx.rlc.reg_list_format_separate_start =
977                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
978         adev->gfx.rlc.starting_offsets_start =
979                         le32_to_cpu(rlc_hdr->starting_offsets_start);
980         adev->gfx.rlc.reg_list_format_size_bytes =
981                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
982         adev->gfx.rlc.reg_list_size_bytes =
983                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
984
985         adev->gfx.rlc.register_list_format =
986                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
987                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
988
989         if (!adev->gfx.rlc.register_list_format) {
990                 err = -ENOMEM;
991                 goto out;
992         }
993
994         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
995                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
996         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
997                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
998
999         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1000
1001         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1002                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1003         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1004                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1005
1006         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1007         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1008         if (err)
1009                 goto out;
1010         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1011         if (err)
1012                 goto out;
1013         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1014         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1016
1017         if ((adev->asic_type != CHIP_STONEY) &&
1018             (adev->asic_type != CHIP_TOPAZ)) {
1019                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1020                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1021                 if (!err) {
1022                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1023                         if (err)
1024                                 goto out;
1025                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1026                                 adev->gfx.mec2_fw->data;
1027                         adev->gfx.mec2_fw_version =
1028                                 le32_to_cpu(cp_hdr->header.ucode_version);
1029                         adev->gfx.mec2_feature_version =
1030                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1031                 } else {
1032                         err = 0;
1033                         adev->gfx.mec2_fw = NULL;
1034                 }
1035         }
1036
1037         if (adev->firmware.smu_load) {
1038                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1039                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1040                 info->fw = adev->gfx.pfp_fw;
1041                 header = (const struct common_firmware_header *)info->fw->data;
1042                 adev->firmware.fw_size +=
1043                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1044
1045                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1046                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1047                 info->fw = adev->gfx.me_fw;
1048                 header = (const struct common_firmware_header *)info->fw->data;
1049                 adev->firmware.fw_size +=
1050                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1051
1052                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1053                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1054                 info->fw = adev->gfx.ce_fw;
1055                 header = (const struct common_firmware_header *)info->fw->data;
1056                 adev->firmware.fw_size +=
1057                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058
1059                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1060                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1061                 info->fw = adev->gfx.rlc_fw;
1062                 header = (const struct common_firmware_header *)info->fw->data;
1063                 adev->firmware.fw_size +=
1064                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065
1066                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1067                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1068                 info->fw = adev->gfx.mec_fw;
1069                 header = (const struct common_firmware_header *)info->fw->data;
1070                 adev->firmware.fw_size +=
1071                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1072
1073                 /* we need account JT in */
1074                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075                 adev->firmware.fw_size +=
1076                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1077
1078                 if (amdgpu_sriov_vf(adev)) {
1079                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1080                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1081                         info->fw = adev->gfx.mec_fw;
1082                         adev->firmware.fw_size +=
1083                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1084                 }
1085
1086                 if (adev->gfx.mec2_fw) {
1087                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1088                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1089                         info->fw = adev->gfx.mec2_fw;
1090                         header = (const struct common_firmware_header *)info->fw->data;
1091                         adev->firmware.fw_size +=
1092                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1093                 }
1094
1095         }
1096
1097 out:
1098         if (err) {
1099                 dev_err(adev->dev,
1100                         "gfx8: Failed to load firmware \"%s\"\n",
1101                         fw_name);
1102                 release_firmware(adev->gfx.pfp_fw);
1103                 adev->gfx.pfp_fw = NULL;
1104                 release_firmware(adev->gfx.me_fw);
1105                 adev->gfx.me_fw = NULL;
1106                 release_firmware(adev->gfx.ce_fw);
1107                 adev->gfx.ce_fw = NULL;
1108                 release_firmware(adev->gfx.rlc_fw);
1109                 adev->gfx.rlc_fw = NULL;
1110                 release_firmware(adev->gfx.mec_fw);
1111                 adev->gfx.mec_fw = NULL;
1112                 release_firmware(adev->gfx.mec2_fw);
1113                 adev->gfx.mec2_fw = NULL;
1114         }
1115         return err;
1116 }
1117
1118 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1119                                     volatile u32 *buffer)
1120 {
1121         u32 count = 0, i;
1122         const struct cs_section_def *sect = NULL;
1123         const struct cs_extent_def *ext = NULL;
1124
1125         if (adev->gfx.rlc.cs_data == NULL)
1126                 return;
1127         if (buffer == NULL)
1128                 return;
1129
1130         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1131         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1132
1133         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1134         buffer[count++] = cpu_to_le32(0x80000000);
1135         buffer[count++] = cpu_to_le32(0x80000000);
1136
1137         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1138                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1139                         if (sect->id == SECT_CONTEXT) {
1140                                 buffer[count++] =
1141                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1142                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1143                                                 PACKET3_SET_CONTEXT_REG_START);
1144                                 for (i = 0; i < ext->reg_count; i++)
1145                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1146                         } else {
1147                                 return;
1148                         }
1149                 }
1150         }
1151
1152         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1153         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1154                         PACKET3_SET_CONTEXT_REG_START);
1155         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1156         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1160
1161         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162         buffer[count++] = cpu_to_le32(0);
1163 }
1164
1165 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1166 {
1167         const __le32 *fw_data;
1168         volatile u32 *dst_ptr;
1169         int me, i, max_me = 4;
1170         u32 bo_offset = 0;
1171         u32 table_offset, table_size;
1172
1173         if (adev->asic_type == CHIP_CARRIZO)
1174                 max_me = 5;
1175
1176         /* write the cp table buffer */
1177         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178         for (me = 0; me < max_me; me++) {
1179                 if (me == 0) {
1180                         const struct gfx_firmware_header_v1_0 *hdr =
1181                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182                         fw_data = (const __le32 *)
1183                                 (adev->gfx.ce_fw->data +
1184                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185                         table_offset = le32_to_cpu(hdr->jt_offset);
1186                         table_size = le32_to_cpu(hdr->jt_size);
1187                 } else if (me == 1) {
1188                         const struct gfx_firmware_header_v1_0 *hdr =
1189                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190                         fw_data = (const __le32 *)
1191                                 (adev->gfx.pfp_fw->data +
1192                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193                         table_offset = le32_to_cpu(hdr->jt_offset);
1194                         table_size = le32_to_cpu(hdr->jt_size);
1195                 } else if (me == 2) {
1196                         const struct gfx_firmware_header_v1_0 *hdr =
1197                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198                         fw_data = (const __le32 *)
1199                                 (adev->gfx.me_fw->data +
1200                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201                         table_offset = le32_to_cpu(hdr->jt_offset);
1202                         table_size = le32_to_cpu(hdr->jt_size);
1203                 } else if (me == 3) {
1204                         const struct gfx_firmware_header_v1_0 *hdr =
1205                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206                         fw_data = (const __le32 *)
1207                                 (adev->gfx.mec_fw->data +
1208                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209                         table_offset = le32_to_cpu(hdr->jt_offset);
1210                         table_size = le32_to_cpu(hdr->jt_size);
1211                 } else  if (me == 4) {
1212                         const struct gfx_firmware_header_v1_0 *hdr =
1213                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214                         fw_data = (const __le32 *)
1215                                 (adev->gfx.mec2_fw->data +
1216                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217                         table_offset = le32_to_cpu(hdr->jt_offset);
1218                         table_size = le32_to_cpu(hdr->jt_size);
1219                 }
1220
1221                 for (i = 0; i < table_size; i ++) {
1222                         dst_ptr[bo_offset + i] =
1223                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1224                 }
1225
1226                 bo_offset += table_size;
1227         }
1228 }
1229
1230 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1231 {
1232         int r;
1233
1234         /* clear state block */
1235         if (adev->gfx.rlc.clear_state_obj) {
1236                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237                 if (unlikely(r != 0))
1238                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242                 adev->gfx.rlc.clear_state_obj = NULL;
1243         }
1244
1245         /* jump table block */
1246         if (adev->gfx.rlc.cp_table_obj) {
1247                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248                 if (unlikely(r != 0))
1249                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253                 adev->gfx.rlc.cp_table_obj = NULL;
1254         }
1255 }
1256
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258 {
1259         volatile u32 *dst_ptr;
1260         u32 dws;
1261         const struct cs_section_def *cs_data;
1262         int r;
1263
1264         adev->gfx.rlc.cs_data = vi_cs_data;
1265
1266         cs_data = adev->gfx.rlc.cs_data;
1267
1268         if (cs_data) {
1269                 /* clear state block */
1270                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271
1272                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1273                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274                                              AMDGPU_GEM_DOMAIN_VRAM,
1275                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1276                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1277                                              NULL, NULL,
1278                                              &adev->gfx.rlc.clear_state_obj);
1279                         if (r) {
1280                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281                                 gfx_v8_0_rlc_fini(adev);
1282                                 return r;
1283                         }
1284                 }
1285                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286                 if (unlikely(r != 0)) {
1287                         gfx_v8_0_rlc_fini(adev);
1288                         return r;
1289                 }
1290                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291                                   &adev->gfx.rlc.clear_state_gpu_addr);
1292                 if (r) {
1293                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295                         gfx_v8_0_rlc_fini(adev);
1296                         return r;
1297                 }
1298
1299                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300                 if (r) {
1301                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302                         gfx_v8_0_rlc_fini(adev);
1303                         return r;
1304                 }
1305                 /* set up the cs buffer */
1306                 dst_ptr = adev->gfx.rlc.cs_ptr;
1307                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310         }
1311
1312         if ((adev->asic_type == CHIP_CARRIZO) ||
1313             (adev->asic_type == CHIP_STONEY)) {
1314                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1316                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317                                              AMDGPU_GEM_DOMAIN_VRAM,
1318                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1319                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1320                                              NULL, NULL,
1321                                              &adev->gfx.rlc.cp_table_obj);
1322                         if (r) {
1323                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1324                                 return r;
1325                         }
1326                 }
1327
1328                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1329                 if (unlikely(r != 0)) {
1330                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1331                         return r;
1332                 }
1333                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1334                                   &adev->gfx.rlc.cp_table_gpu_addr);
1335                 if (r) {
1336                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1337                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1338                         return r;
1339                 }
1340                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1341                 if (r) {
1342                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1343                         return r;
1344                 }
1345
1346                 cz_init_cp_jump_table(adev);
1347
1348                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1349                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1350         }
1351
1352         return 0;
1353 }
1354
1355 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1356 {
1357         int r;
1358
1359         if (adev->gfx.mec.hpd_eop_obj) {
1360                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1361                 if (unlikely(r != 0))
1362                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1363                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1364                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1365                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366                 adev->gfx.mec.hpd_eop_obj = NULL;
1367         }
1368 }
1369
1370 #define MEC_HPD_SIZE 2048
1371
1372 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1373 {
1374         int r;
1375         u32 *hpd;
1376
1377         /*
1378          * we assign only 1 pipe because all other pipes will
1379          * be handled by KFD
1380          */
1381         adev->gfx.mec.num_mec = 1;
1382         adev->gfx.mec.num_pipe = 1;
1383         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1384
1385         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1386                 r = amdgpu_bo_create(adev,
1387                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1388                                      PAGE_SIZE, true,
1389                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1390                                      &adev->gfx.mec.hpd_eop_obj);
1391                 if (r) {
1392                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1393                         return r;
1394                 }
1395         }
1396
1397         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1398         if (unlikely(r != 0)) {
1399                 gfx_v8_0_mec_fini(adev);
1400                 return r;
1401         }
1402         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1403                           &adev->gfx.mec.hpd_eop_gpu_addr);
1404         if (r) {
1405                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1406                 gfx_v8_0_mec_fini(adev);
1407                 return r;
1408         }
1409         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1410         if (r) {
1411                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1412                 gfx_v8_0_mec_fini(adev);
1413                 return r;
1414         }
1415
1416         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1417
1418         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1419         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1420
1421         return 0;
1422 }
1423
1424 static const u32 vgpr_init_compute_shader[] =
1425 {
1426         0x7e000209, 0x7e020208,
1427         0x7e040207, 0x7e060206,
1428         0x7e080205, 0x7e0a0204,
1429         0x7e0c0203, 0x7e0e0202,
1430         0x7e100201, 0x7e120200,
1431         0x7e140209, 0x7e160208,
1432         0x7e180207, 0x7e1a0206,
1433         0x7e1c0205, 0x7e1e0204,
1434         0x7e200203, 0x7e220202,
1435         0x7e240201, 0x7e260200,
1436         0x7e280209, 0x7e2a0208,
1437         0x7e2c0207, 0x7e2e0206,
1438         0x7e300205, 0x7e320204,
1439         0x7e340203, 0x7e360202,
1440         0x7e380201, 0x7e3a0200,
1441         0x7e3c0209, 0x7e3e0208,
1442         0x7e400207, 0x7e420206,
1443         0x7e440205, 0x7e460204,
1444         0x7e480203, 0x7e4a0202,
1445         0x7e4c0201, 0x7e4e0200,
1446         0x7e500209, 0x7e520208,
1447         0x7e540207, 0x7e560206,
1448         0x7e580205, 0x7e5a0204,
1449         0x7e5c0203, 0x7e5e0202,
1450         0x7e600201, 0x7e620200,
1451         0x7e640209, 0x7e660208,
1452         0x7e680207, 0x7e6a0206,
1453         0x7e6c0205, 0x7e6e0204,
1454         0x7e700203, 0x7e720202,
1455         0x7e740201, 0x7e760200,
1456         0x7e780209, 0x7e7a0208,
1457         0x7e7c0207, 0x7e7e0206,
1458         0xbf8a0000, 0xbf810000,
1459 };
1460
1461 static const u32 sgpr_init_compute_shader[] =
1462 {
1463         0xbe8a0100, 0xbe8c0102,
1464         0xbe8e0104, 0xbe900106,
1465         0xbe920108, 0xbe940100,
1466         0xbe960102, 0xbe980104,
1467         0xbe9a0106, 0xbe9c0108,
1468         0xbe9e0100, 0xbea00102,
1469         0xbea20104, 0xbea40106,
1470         0xbea60108, 0xbea80100,
1471         0xbeaa0102, 0xbeac0104,
1472         0xbeae0106, 0xbeb00108,
1473         0xbeb20100, 0xbeb40102,
1474         0xbeb60104, 0xbeb80106,
1475         0xbeba0108, 0xbebc0100,
1476         0xbebe0102, 0xbec00104,
1477         0xbec20106, 0xbec40108,
1478         0xbec60100, 0xbec80102,
1479         0xbee60004, 0xbee70005,
1480         0xbeea0006, 0xbeeb0007,
1481         0xbee80008, 0xbee90009,
1482         0xbefc0000, 0xbf8a0000,
1483         0xbf810000, 0x00000000,
1484 };
1485
1486 static const u32 vgpr_init_regs[] =
1487 {
1488         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1489         mmCOMPUTE_RESOURCE_LIMITS, 0,
1490         mmCOMPUTE_NUM_THREAD_X, 256*4,
1491         mmCOMPUTE_NUM_THREAD_Y, 1,
1492         mmCOMPUTE_NUM_THREAD_Z, 1,
1493         mmCOMPUTE_PGM_RSRC2, 20,
1494         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1495         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1496         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1497         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1498         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1499         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1500         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1501         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1502         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1503         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1504 };
1505
1506 static const u32 sgpr1_init_regs[] =
1507 {
1508         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1509         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1510         mmCOMPUTE_NUM_THREAD_X, 256*5,
1511         mmCOMPUTE_NUM_THREAD_Y, 1,
1512         mmCOMPUTE_NUM_THREAD_Z, 1,
1513         mmCOMPUTE_PGM_RSRC2, 20,
1514         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1515         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1516         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1517         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1518         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1519         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1520         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1521         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1522         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1523         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1524 };
1525
1526 static const u32 sgpr2_init_regs[] =
1527 {
1528         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1529         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1530         mmCOMPUTE_NUM_THREAD_X, 256*5,
1531         mmCOMPUTE_NUM_THREAD_Y, 1,
1532         mmCOMPUTE_NUM_THREAD_Z, 1,
1533         mmCOMPUTE_PGM_RSRC2, 20,
1534         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544 };
1545
1546 static const u32 sec_ded_counter_registers[] =
1547 {
1548         mmCPC_EDC_ATC_CNT,
1549         mmCPC_EDC_SCRATCH_CNT,
1550         mmCPC_EDC_UCODE_CNT,
1551         mmCPF_EDC_ATC_CNT,
1552         mmCPF_EDC_ROQ_CNT,
1553         mmCPF_EDC_TAG_CNT,
1554         mmCPG_EDC_ATC_CNT,
1555         mmCPG_EDC_DMA_CNT,
1556         mmCPG_EDC_TAG_CNT,
1557         mmDC_EDC_CSINVOC_CNT,
1558         mmDC_EDC_RESTORE_CNT,
1559         mmDC_EDC_STATE_CNT,
1560         mmGDS_EDC_CNT,
1561         mmGDS_EDC_GRBM_CNT,
1562         mmGDS_EDC_OA_DED,
1563         mmSPI_EDC_CNT,
1564         mmSQC_ATC_EDC_GATCL1_CNT,
1565         mmSQC_EDC_CNT,
1566         mmSQ_EDC_DED_CNT,
1567         mmSQ_EDC_INFO,
1568         mmSQ_EDC_SEC_CNT,
1569         mmTCC_EDC_CNT,
1570         mmTCP_ATC_EDC_GATCL1_CNT,
1571         mmTCP_EDC_CNT,
1572         mmTD_EDC_CNT
1573 };
1574
1575 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1576 {
1577         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1578         struct amdgpu_ib ib;
1579         struct dma_fence *f = NULL;
1580         int r, i;
1581         u32 tmp;
1582         unsigned total_size, vgpr_offset, sgpr_offset;
1583         u64 gpu_addr;
1584
1585         /* only supported on CZ */
1586         if (adev->asic_type != CHIP_CARRIZO)
1587                 return 0;
1588
1589         /* bail if the compute ring is not ready */
1590         if (!ring->ready)
1591                 return 0;
1592
1593         tmp = RREG32(mmGB_EDC_MODE);
1594         WREG32(mmGB_EDC_MODE, 0);
1595
1596         total_size =
1597                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598         total_size +=
1599                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600         total_size +=
1601                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1602         total_size = ALIGN(total_size, 256);
1603         vgpr_offset = total_size;
1604         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1605         sgpr_offset = total_size;
1606         total_size += sizeof(sgpr_init_compute_shader);
1607
1608         /* allocate an indirect buffer to put the commands in */
1609         memset(&ib, 0, sizeof(ib));
1610         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1611         if (r) {
1612                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1613                 return r;
1614         }
1615
1616         /* load the compute shaders */
1617         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1618                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1619
1620         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1621                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1622
1623         /* init the ib length to 0 */
1624         ib.length_dw = 0;
1625
1626         /* VGPR */
1627         /* write the register state for the compute dispatch */
1628         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1629                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1630                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1631                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1632         }
1633         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1634         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1635         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1636         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1637         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1638         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639
1640         /* write dispatch packet */
1641         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1642         ib.ptr[ib.length_dw++] = 8; /* x */
1643         ib.ptr[ib.length_dw++] = 1; /* y */
1644         ib.ptr[ib.length_dw++] = 1; /* z */
1645         ib.ptr[ib.length_dw++] =
1646                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647
1648         /* write CS partial flush packet */
1649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1650         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1651
1652         /* SGPR1 */
1653         /* write the register state for the compute dispatch */
1654         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1655                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1656                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1657                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1658         }
1659         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1660         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1661         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1662         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1663         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1664         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665
1666         /* write dispatch packet */
1667         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1668         ib.ptr[ib.length_dw++] = 8; /* x */
1669         ib.ptr[ib.length_dw++] = 1; /* y */
1670         ib.ptr[ib.length_dw++] = 1; /* z */
1671         ib.ptr[ib.length_dw++] =
1672                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673
1674         /* write CS partial flush packet */
1675         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1676         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677
1678         /* SGPR2 */
1679         /* write the register state for the compute dispatch */
1680         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1681                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1682                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1683                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1684         }
1685         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1686         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1687         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1688         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1689         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1690         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1691
1692         /* write dispatch packet */
1693         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1694         ib.ptr[ib.length_dw++] = 8; /* x */
1695         ib.ptr[ib.length_dw++] = 1; /* y */
1696         ib.ptr[ib.length_dw++] = 1; /* z */
1697         ib.ptr[ib.length_dw++] =
1698                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1699
1700         /* write CS partial flush packet */
1701         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1702         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1703
1704         /* shedule the ib on the ring */
1705         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1706         if (r) {
1707                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1708                 goto fail;
1709         }
1710
1711         /* wait for the GPU to finish processing the IB */
1712         r = dma_fence_wait(f, false);
1713         if (r) {
1714                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1715                 goto fail;
1716         }
1717
1718         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1719         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1720         WREG32(mmGB_EDC_MODE, tmp);
1721
1722         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1723         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1724         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1725
1726
1727         /* read back registers to clear the counters */
1728         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1729                 RREG32(sec_ded_counter_registers[i]);
1730
1731 fail:
1732         amdgpu_ib_free(adev, &ib, NULL);
1733         dma_fence_put(f);
1734
1735         return r;
1736 }
1737
1738 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1739 {
1740         u32 gb_addr_config;
1741         u32 mc_shared_chmap, mc_arb_ramcfg;
1742         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1743         u32 tmp;
1744         int ret;
1745
1746         switch (adev->asic_type) {
1747         case CHIP_TOPAZ:
1748                 adev->gfx.config.max_shader_engines = 1;
1749                 adev->gfx.config.max_tile_pipes = 2;
1750                 adev->gfx.config.max_cu_per_sh = 6;
1751                 adev->gfx.config.max_sh_per_se = 1;
1752                 adev->gfx.config.max_backends_per_se = 2;
1753                 adev->gfx.config.max_texture_channel_caches = 2;
1754                 adev->gfx.config.max_gprs = 256;
1755                 adev->gfx.config.max_gs_threads = 32;
1756                 adev->gfx.config.max_hw_contexts = 8;
1757
1758                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1759                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1760                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1761                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1762                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1763                 break;
1764         case CHIP_FIJI:
1765                 adev->gfx.config.max_shader_engines = 4;
1766                 adev->gfx.config.max_tile_pipes = 16;
1767                 adev->gfx.config.max_cu_per_sh = 16;
1768                 adev->gfx.config.max_sh_per_se = 1;
1769                 adev->gfx.config.max_backends_per_se = 4;
1770                 adev->gfx.config.max_texture_channel_caches = 16;
1771                 adev->gfx.config.max_gprs = 256;
1772                 adev->gfx.config.max_gs_threads = 32;
1773                 adev->gfx.config.max_hw_contexts = 8;
1774
1775                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1776                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1777                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1778                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1779                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1780                 break;
1781         case CHIP_POLARIS11:
1782         case CHIP_POLARIS12:
1783                 ret = amdgpu_atombios_get_gfx_info(adev);
1784                 if (ret)
1785                         return ret;
1786                 adev->gfx.config.max_gprs = 256;
1787                 adev->gfx.config.max_gs_threads = 32;
1788                 adev->gfx.config.max_hw_contexts = 8;
1789
1790                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1791                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1792                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1793                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1794                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1795                 break;
1796         case CHIP_POLARIS10:
1797                 ret = amdgpu_atombios_get_gfx_info(adev);
1798                 if (ret)
1799                         return ret;
1800                 adev->gfx.config.max_gprs = 256;
1801                 adev->gfx.config.max_gs_threads = 32;
1802                 adev->gfx.config.max_hw_contexts = 8;
1803
1804                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1805                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1806                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1807                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1808                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1809                 break;
1810         case CHIP_TONGA:
1811                 adev->gfx.config.max_shader_engines = 4;
1812                 adev->gfx.config.max_tile_pipes = 8;
1813                 adev->gfx.config.max_cu_per_sh = 8;
1814                 adev->gfx.config.max_sh_per_se = 1;
1815                 adev->gfx.config.max_backends_per_se = 2;
1816                 adev->gfx.config.max_texture_channel_caches = 8;
1817                 adev->gfx.config.max_gprs = 256;
1818                 adev->gfx.config.max_gs_threads = 32;
1819                 adev->gfx.config.max_hw_contexts = 8;
1820
1821                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1822                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1823                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1824                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1825                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1826                 break;
1827         case CHIP_CARRIZO:
1828                 adev->gfx.config.max_shader_engines = 1;
1829                 adev->gfx.config.max_tile_pipes = 2;
1830                 adev->gfx.config.max_sh_per_se = 1;
1831                 adev->gfx.config.max_backends_per_se = 2;
1832
1833                 switch (adev->pdev->revision) {
1834                 case 0xc4:
1835                 case 0x84:
1836                 case 0xc8:
1837                 case 0xcc:
1838                 case 0xe1:
1839                 case 0xe3:
1840                         /* B10 */
1841                         adev->gfx.config.max_cu_per_sh = 8;
1842                         break;
1843                 case 0xc5:
1844                 case 0x81:
1845                 case 0x85:
1846                 case 0xc9:
1847                 case 0xcd:
1848                 case 0xe2:
1849                 case 0xe4:
1850                         /* B8 */
1851                         adev->gfx.config.max_cu_per_sh = 6;
1852                         break;
1853                 case 0xc6:
1854                 case 0xca:
1855                 case 0xce:
1856                 case 0x88:
1857                         /* B6 */
1858                         adev->gfx.config.max_cu_per_sh = 6;
1859                         break;
1860                 case 0xc7:
1861                 case 0x87:
1862                 case 0xcb:
1863                 case 0xe5:
1864                 case 0x89:
1865                 default:
1866                         /* B4 */
1867                         adev->gfx.config.max_cu_per_sh = 4;
1868                         break;
1869                 }
1870
1871                 adev->gfx.config.max_texture_channel_caches = 2;
1872                 adev->gfx.config.max_gprs = 256;
1873                 adev->gfx.config.max_gs_threads = 32;
1874                 adev->gfx.config.max_hw_contexts = 8;
1875
1876                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1881                 break;
1882         case CHIP_STONEY:
1883                 adev->gfx.config.max_shader_engines = 1;
1884                 adev->gfx.config.max_tile_pipes = 2;
1885                 adev->gfx.config.max_sh_per_se = 1;
1886                 adev->gfx.config.max_backends_per_se = 1;
1887
1888                 switch (adev->pdev->revision) {
1889                 case 0xc0:
1890                 case 0xc1:
1891                 case 0xc2:
1892                 case 0xc4:
1893                 case 0xc8:
1894                 case 0xc9:
1895                         adev->gfx.config.max_cu_per_sh = 3;
1896                         break;
1897                 case 0xd0:
1898                 case 0xd1:
1899                 case 0xd2:
1900                 default:
1901                         adev->gfx.config.max_cu_per_sh = 2;
1902                         break;
1903                 }
1904
1905                 adev->gfx.config.max_texture_channel_caches = 2;
1906                 adev->gfx.config.max_gprs = 256;
1907                 adev->gfx.config.max_gs_threads = 16;
1908                 adev->gfx.config.max_hw_contexts = 8;
1909
1910                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1911                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1912                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1913                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1914                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1915                 break;
1916         default:
1917                 adev->gfx.config.max_shader_engines = 2;
1918                 adev->gfx.config.max_tile_pipes = 4;
1919                 adev->gfx.config.max_cu_per_sh = 2;
1920                 adev->gfx.config.max_sh_per_se = 1;
1921                 adev->gfx.config.max_backends_per_se = 2;
1922                 adev->gfx.config.max_texture_channel_caches = 4;
1923                 adev->gfx.config.max_gprs = 256;
1924                 adev->gfx.config.max_gs_threads = 32;
1925                 adev->gfx.config.max_hw_contexts = 8;
1926
1927                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1928                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1929                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1930                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1931                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1932                 break;
1933         }
1934
1935         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1936         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1937         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1938
1939         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1940         adev->gfx.config.mem_max_burst_length_bytes = 256;
1941         if (adev->flags & AMD_IS_APU) {
1942                 /* Get memory bank mapping mode. */
1943                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1944                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1945                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1946
1947                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1948                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1949                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1950
1951                 /* Validate settings in case only one DIMM installed. */
1952                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1953                         dimm00_addr_map = 0;
1954                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1955                         dimm01_addr_map = 0;
1956                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1957                         dimm10_addr_map = 0;
1958                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1959                         dimm11_addr_map = 0;
1960
1961                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1962                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1963                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1964                         adev->gfx.config.mem_row_size_in_kb = 2;
1965                 else
1966                         adev->gfx.config.mem_row_size_in_kb = 1;
1967         } else {
1968                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1969                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1970                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1971                         adev->gfx.config.mem_row_size_in_kb = 4;
1972         }
1973
1974         adev->gfx.config.shader_engine_tile_size = 32;
1975         adev->gfx.config.num_gpus = 1;
1976         adev->gfx.config.multi_gpu_tile_size = 64;
1977
1978         /* fix up row size */
1979         switch (adev->gfx.config.mem_row_size_in_kb) {
1980         case 1:
1981         default:
1982                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1983                 break;
1984         case 2:
1985                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1986                 break;
1987         case 4:
1988                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1989                 break;
1990         }
1991         adev->gfx.config.gb_addr_config = gb_addr_config;
1992
1993         return 0;
1994 }
1995
1996 static int gfx_v8_0_sw_init(void *handle)
1997 {
1998         int i, r;
1999         struct amdgpu_ring *ring;
2000         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2001
2002         /* EOP Event */
2003         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2004         if (r)
2005                 return r;
2006
2007         /* Privileged reg */
2008         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2009         if (r)
2010                 return r;
2011
2012         /* Privileged inst */
2013         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2014         if (r)
2015                 return r;
2016
2017         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2018
2019         gfx_v8_0_scratch_init(adev);
2020
2021         r = gfx_v8_0_init_microcode(adev);
2022         if (r) {
2023                 DRM_ERROR("Failed to load gfx firmware!\n");
2024                 return r;
2025         }
2026
2027         r = gfx_v8_0_rlc_init(adev);
2028         if (r) {
2029                 DRM_ERROR("Failed to init rlc BOs!\n");
2030                 return r;
2031         }
2032
2033         r = gfx_v8_0_mec_init(adev);
2034         if (r) {
2035                 DRM_ERROR("Failed to init MEC BOs!\n");
2036                 return r;
2037         }
2038
2039         /* set up the gfx ring */
2040         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2041                 ring = &adev->gfx.gfx_ring[i];
2042                 ring->ring_obj = NULL;
2043                 sprintf(ring->name, "gfx");
2044                 /* no gfx doorbells on iceland */
2045                 if (adev->asic_type != CHIP_TOPAZ) {
2046                         ring->use_doorbell = true;
2047                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2048                 }
2049
2050                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2051                                      AMDGPU_CP_IRQ_GFX_EOP);
2052                 if (r)
2053                         return r;
2054         }
2055
2056         /* set up the compute queues */
2057         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058                 unsigned irq_type;
2059
2060                 /* max 32 queues per MEC */
2061                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2063                         break;
2064                 }
2065                 ring = &adev->gfx.compute_ring[i];
2066                 ring->ring_obj = NULL;
2067                 ring->use_doorbell = true;
2068                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069                 ring->me = 1; /* first MEC */
2070                 ring->pipe = i / 8;
2071                 ring->queue = i % 8;
2072                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2075                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2076                                      irq_type);
2077                 if (r)
2078                         return r;
2079         }
2080
2081         /* reserve GDS, GWS and OA resource for gfx */
2082         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2083                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2084                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2085         if (r)
2086                 return r;
2087
2088         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2089                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2090                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2091         if (r)
2092                 return r;
2093
2094         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2095                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2096                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2097         if (r)
2098                 return r;
2099
2100         adev->gfx.ce_ram_size = 0x8000;
2101
2102         r = gfx_v8_0_gpu_early_init(adev);
2103         if (r)
2104                 return r;
2105
2106         return 0;
2107 }
2108
2109 static int gfx_v8_0_sw_fini(void *handle)
2110 {
2111         int i;
2112         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2113
2114         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2115         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2116         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2117
2118         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2119                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2120         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2121                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2122
2123         gfx_v8_0_mec_fini(adev);
2124         gfx_v8_0_rlc_fini(adev);
2125         gfx_v8_0_free_microcode(adev);
2126
2127         return 0;
2128 }
2129
2130 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2131 {
2132         uint32_t *modearray, *mod2array;
2133         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2134         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2135         u32 reg_offset;
2136
2137         modearray = adev->gfx.config.tile_mode_array;
2138         mod2array = adev->gfx.config.macrotile_mode_array;
2139
2140         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2141                 modearray[reg_offset] = 0;
2142
2143         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2144                 mod2array[reg_offset] = 0;
2145
2146         switch (adev->asic_type) {
2147         case CHIP_TOPAZ:
2148                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2));
2178                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179                                 PIPE_CONFIG(ADDR_SURF_P2) |
2180                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2181                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2190                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2206                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239                                  PIPE_CONFIG(ADDR_SURF_P2) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P2) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P2) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250
2251                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254                                 NUM_BANKS(ADDR_SURF_8_BANK));
2255                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                 NUM_BANKS(ADDR_SURF_8_BANK));
2259                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270                                 NUM_BANKS(ADDR_SURF_8_BANK));
2271                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274                                 NUM_BANKS(ADDR_SURF_8_BANK));
2275                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278                                 NUM_BANKS(ADDR_SURF_8_BANK));
2279                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282                                 NUM_BANKS(ADDR_SURF_16_BANK));
2283                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286                                 NUM_BANKS(ADDR_SURF_16_BANK));
2287                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2288                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                  NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298                                  NUM_BANKS(ADDR_SURF_16_BANK));
2299                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2301                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302                                  NUM_BANKS(ADDR_SURF_16_BANK));
2303                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306                                  NUM_BANKS(ADDR_SURF_8_BANK));
2307
2308                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2309                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2310                             reg_offset != 23)
2311                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2312
2313                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2314                         if (reg_offset != 7)
2315                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2316
2317                 break;
2318         case CHIP_FIJI:
2319                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2349                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2352                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2353                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441
2442                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                 NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                 NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2472                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473                                 NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2476                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477                                 NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481                                  NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485                                  NUM_BANKS(ADDR_SURF_8_BANK));
2486                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2488                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493                                  NUM_BANKS(ADDR_SURF_8_BANK));
2494                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497                                  NUM_BANKS(ADDR_SURF_4_BANK));
2498
2499                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2500                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2501
2502                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2503                         if (reg_offset != 7)
2504                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2505
2506                 break;
2507         case CHIP_TONGA:
2508                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2603                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630
2631                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662                                 NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2665                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666                                 NUM_BANKS(ADDR_SURF_16_BANK));
2667                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2669                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674                                  NUM_BANKS(ADDR_SURF_16_BANK));
2675                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678                                  NUM_BANKS(ADDR_SURF_8_BANK));
2679                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682                                  NUM_BANKS(ADDR_SURF_4_BANK));
2683                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686                                  NUM_BANKS(ADDR_SURF_4_BANK));
2687
2688                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2689                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2690
2691                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2692                         if (reg_offset != 7)
2693                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2694
2695                 break;
2696         case CHIP_POLARIS11:
2697         case CHIP_POLARIS12:
2698                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2730                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2732                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2748                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2768                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2791                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2795                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2803                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2804                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2812                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2819                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2820
2821                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829                                 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834                                 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849                                 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854                                 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874                                 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884                                 NUM_BANKS(ADDR_SURF_8_BANK));
2885
2886                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2889                                 NUM_BANKS(ADDR_SURF_4_BANK));
2890
2891                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2892                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2893
2894                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2895                         if (reg_offset != 7)
2896                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2897
2898                 break;
2899         case CHIP_POLARIS10:
2900                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2930                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2932                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2934                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2950                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2970                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2991                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2993                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2997                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2999                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3005                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3006                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3011                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3014                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3019                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3021                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3022
3023                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036                                 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041                                 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3045                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046                                 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051                                 NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056                                 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3060                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061                                 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066                                 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076                                 NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081                                 NUM_BANKS(ADDR_SURF_8_BANK));
3082
3083                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086                                 NUM_BANKS(ADDR_SURF_4_BANK));
3087
3088                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091                                 NUM_BANKS(ADDR_SURF_4_BANK));
3092
3093                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3094                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3095
3096                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3097                         if (reg_offset != 7)
3098                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3099
3100                 break;
3101         case CHIP_STONEY:
3102                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103                                 PIPE_CONFIG(ADDR_SURF_P2) |
3104                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107                                 PIPE_CONFIG(ADDR_SURF_P2) |
3108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P2) |
3112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                 PIPE_CONFIG(ADDR_SURF_P2) |
3116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3123                                 PIPE_CONFIG(ADDR_SURF_P2) |
3124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127                                 PIPE_CONFIG(ADDR_SURF_P2) |
3128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3130                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3131                                 PIPE_CONFIG(ADDR_SURF_P2));
3132                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3133                                 PIPE_CONFIG(ADDR_SURF_P2) |
3134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3144                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3160                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204
3205                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208                                 NUM_BANKS(ADDR_SURF_8_BANK));
3209                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212                                 NUM_BANKS(ADDR_SURF_8_BANK));
3213                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216                                 NUM_BANKS(ADDR_SURF_8_BANK));
3217                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220                                 NUM_BANKS(ADDR_SURF_8_BANK));
3221                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224                                 NUM_BANKS(ADDR_SURF_8_BANK));
3225                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228                                 NUM_BANKS(ADDR_SURF_8_BANK));
3229                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232                                 NUM_BANKS(ADDR_SURF_8_BANK));
3233                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236                                 NUM_BANKS(ADDR_SURF_16_BANK));
3237                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                 NUM_BANKS(ADDR_SURF_16_BANK));
3241                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244                                  NUM_BANKS(ADDR_SURF_16_BANK));
3245                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252                                  NUM_BANKS(ADDR_SURF_16_BANK));
3253                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3256                                  NUM_BANKS(ADDR_SURF_16_BANK));
3257                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260                                  NUM_BANKS(ADDR_SURF_8_BANK));
3261
3262                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3263                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3264                             reg_offset != 23)
3265                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3266
3267                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3268                         if (reg_offset != 7)
3269                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3270
3271                 break;
3272         default:
3273                 dev_warn(adev->dev,
3274                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3275                          adev->asic_type);
3276
3277         case CHIP_CARRIZO:
3278                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2) |
3280                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3281                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291                                 PIPE_CONFIG(ADDR_SURF_P2) |
3292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2) |
3296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299                                 PIPE_CONFIG(ADDR_SURF_P2) |
3300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303                                 PIPE_CONFIG(ADDR_SURF_P2) |
3304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3306                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3307                                 PIPE_CONFIG(ADDR_SURF_P2));
3308                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309                                 PIPE_CONFIG(ADDR_SURF_P2) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3320                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3353                                  PIPE_CONFIG(ADDR_SURF_P2) |
3354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3357                                  PIPE_CONFIG(ADDR_SURF_P2) |
3358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3361                                  PIPE_CONFIG(ADDR_SURF_P2) |
3362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3365                                  PIPE_CONFIG(ADDR_SURF_P2) |
3366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369                                  PIPE_CONFIG(ADDR_SURF_P2) |
3370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373                                  PIPE_CONFIG(ADDR_SURF_P2) |
3374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3376                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3377                                  PIPE_CONFIG(ADDR_SURF_P2) |
3378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3380
3381                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_8_BANK));
3385                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_8_BANK));
3389                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                 NUM_BANKS(ADDR_SURF_8_BANK));
3393                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                 NUM_BANKS(ADDR_SURF_8_BANK));
3397                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                 NUM_BANKS(ADDR_SURF_8_BANK));
3401                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404                                 NUM_BANKS(ADDR_SURF_8_BANK));
3405                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                                 NUM_BANKS(ADDR_SURF_8_BANK));
3409                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                 NUM_BANKS(ADDR_SURF_16_BANK));
3413                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                 NUM_BANKS(ADDR_SURF_16_BANK));
3417                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3419                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                  NUM_BANKS(ADDR_SURF_16_BANK));
3421                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3422                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424                                  NUM_BANKS(ADDR_SURF_16_BANK));
3425                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3427                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428                                  NUM_BANKS(ADDR_SURF_16_BANK));
3429                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3432                                  NUM_BANKS(ADDR_SURF_16_BANK));
3433                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3436                                  NUM_BANKS(ADDR_SURF_8_BANK));
3437
3438                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3439                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3440                             reg_offset != 23)
3441                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3442
3443                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3444                         if (reg_offset != 7)
3445                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3446
3447                 break;
3448         }
3449 }
3450
3451 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3452                                   u32 se_num, u32 sh_num, u32 instance)
3453 {
3454         u32 data;
3455
3456         if (instance == 0xffffffff)
3457                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3458         else
3459                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3460
3461         if (se_num == 0xffffffff)
3462                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3463         else
3464                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3465
3466         if (sh_num == 0xffffffff)
3467                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3468         else
3469                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3470
3471         WREG32(mmGRBM_GFX_INDEX, data);
3472 }
3473
3474 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3475 {
3476         return (u32)((1ULL << bit_width) - 1);
3477 }
3478
3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480 {
3481         u32 data, mask;
3482
3483         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3484                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485
3486         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3487
3488         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3489                                        adev->gfx.config.max_sh_per_se);
3490
3491         return (~data) & mask;
3492 }
3493
3494 static void
3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496 {
3497         switch (adev->asic_type) {
3498         case CHIP_FIJI:
3499                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500                           RB_XSEL2(1) | PKR_MAP(2) |
3501                           PKR_XSEL(1) | PKR_YSEL(1) |
3502                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504                            SE_PAIR_YSEL(2);
3505                 break;
3506         case CHIP_TONGA:
3507         case CHIP_POLARIS10:
3508                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509                           SE_XSEL(1) | SE_YSEL(1);
3510                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511                            SE_PAIR_YSEL(2);
3512                 break;
3513         case CHIP_TOPAZ:
3514         case CHIP_CARRIZO:
3515                 *rconf |= RB_MAP_PKR0(2);
3516                 *rconf1 |= 0x0;
3517                 break;
3518         case CHIP_POLARIS11:
3519         case CHIP_POLARIS12:
3520                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521                           SE_XSEL(1) | SE_YSEL(1);
3522                 *rconf1 |= 0x0;
3523                 break;
3524         case CHIP_STONEY:
3525                 *rconf |= 0x0;
3526                 *rconf1 |= 0x0;
3527                 break;
3528         default:
3529                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530                 break;
3531         }
3532 }
3533
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536                                         u32 raster_config, u32 raster_config_1,
3537                                         unsigned rb_mask, unsigned num_rb)
3538 {
3539         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542         unsigned rb_per_se = num_rb / num_se;
3543         unsigned se_mask[4];
3544         unsigned se;
3545
3546         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556                              (!se_mask[2] && !se_mask[3]))) {
3557                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559                 if (!se_mask[0] && !se_mask[1]) {
3560                         raster_config_1 |=
3561                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562                 } else {
3563                         raster_config_1 |=
3564                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565                 }
3566         }
3567
3568         for (se = 0; se < num_se; se++) {
3569                 unsigned raster_config_se = raster_config;
3570                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572                 int idx = (se / 2) * 2;
3573
3574                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575                         raster_config_se &= ~SE_MAP_MASK;
3576
3577                         if (!se_mask[idx]) {
3578                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579                         } else {
3580                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581                         }
3582                 }
3583
3584                 pkr0_mask &= rb_mask;
3585                 pkr1_mask &= rb_mask;
3586                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587                         raster_config_se &= ~PKR_MAP_MASK;
3588
3589                         if (!pkr0_mask) {
3590                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591                         } else {
3592                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593                         }
3594                 }
3595
3596                 if (rb_per_se >= 2) {
3597                         unsigned rb0_mask = 1 << (se * rb_per_se);
3598                         unsigned rb1_mask = rb0_mask << 1;
3599
3600                         rb0_mask &= rb_mask;
3601                         rb1_mask &= rb_mask;
3602                         if (!rb0_mask || !rb1_mask) {
3603                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605                                 if (!rb0_mask) {
3606                                         raster_config_se |=
3607                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608                                 } else {
3609                                         raster_config_se |=
3610                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611                                 }
3612                         }
3613
3614                         if (rb_per_se > 2) {
3615                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616                                 rb1_mask = rb0_mask << 1;
3617                                 rb0_mask &= rb_mask;
3618                                 rb1_mask &= rb_mask;
3619                                 if (!rb0_mask || !rb1_mask) {
3620                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622                                         if (!rb0_mask) {
3623                                                 raster_config_se |=
3624                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625                                         } else {
3626                                                 raster_config_se |=
3627                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628                                         }
3629                                 }
3630                         }
3631                 }
3632
3633                 /* GRBM_GFX_INDEX has a different offset on VI */
3634                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637         }
3638
3639         /* GRBM_GFX_INDEX has a different offset on VI */
3640         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645         int i, j;
3646         u32 data;
3647         u32 raster_config = 0, raster_config_1 = 0;
3648         u32 active_rbs = 0;
3649         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650                                         adev->gfx.config.max_sh_per_se;
3651         unsigned num_rb_pipes;
3652
3653         mutex_lock(&adev->grbm_idx_mutex);
3654         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3658                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659                                                rb_bitmap_width_per_sh);
3660                 }
3661         }
3662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663
3664         adev->gfx.config.backend_enable_mask = active_rbs;
3665         adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668                              adev->gfx.config.max_shader_engines, 16);
3669
3670         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672         if (!adev->gfx.config.backend_enable_mask ||
3673                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3674                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676         } else {
3677                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678                                                         adev->gfx.config.backend_enable_mask,
3679                                                         num_rb_pipes);
3680         }
3681
3682         /* cache the values for userspace */
3683         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3684                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3685                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3686                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3687                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3688                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3689                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3690                         adev->gfx.config.rb_config[i][j].raster_config =
3691                                 RREG32(mmPA_SC_RASTER_CONFIG);
3692                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3693                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3694                 }
3695         }
3696         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3697         mutex_unlock(&adev->grbm_idx_mutex);
3698 }
3699
3700 /**
3701  * gfx_v8_0_init_compute_vmid - gart enable
3702  *
3703  * @rdev: amdgpu_device pointer
3704  *
3705  * Initialize compute vmid sh_mem registers
3706  *
3707  */
3708 #define DEFAULT_SH_MEM_BASES    (0x6000)
3709 #define FIRST_COMPUTE_VMID      (8)
3710 #define LAST_COMPUTE_VMID       (16)
3711 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3712 {
3713         int i;
3714         uint32_t sh_mem_config;
3715         uint32_t sh_mem_bases;
3716
3717         /*
3718          * Configure apertures:
3719          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3722          */
3723         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3724
3725         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3726                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3727                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3728                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3729                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3730                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3731
3732         mutex_lock(&adev->srbm_mutex);
3733         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3734                 vi_srbm_select(adev, 0, 0, 0, i);
3735                 /* CP and shaders */
3736                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3737                 WREG32(mmSH_MEM_APE1_BASE, 1);
3738                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3739                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3740         }
3741         vi_srbm_select(adev, 0, 0, 0, 0);
3742         mutex_unlock(&adev->srbm_mutex);
3743 }
3744
3745 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3746 {
3747         u32 tmp;
3748         int i;
3749
3750         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3751         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3752         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3753         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3754
3755         gfx_v8_0_tiling_mode_table_init(adev);
3756         gfx_v8_0_setup_rb(adev);
3757         gfx_v8_0_get_cu_info(adev);
3758
3759         /* XXX SH_MEM regs */
3760         /* where to put LDS, scratch, GPUVM in FSA64 space */
3761         mutex_lock(&adev->srbm_mutex);
3762         for (i = 0; i < 16; i++) {
3763                 vi_srbm_select(adev, 0, 0, 0, i);
3764                 /* CP and shaders */
3765                 if (i == 0) {
3766                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3767                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3768                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3769                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3770                         WREG32(mmSH_MEM_CONFIG, tmp);
3771                 } else {
3772                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3773                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3774                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3775                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3776                         WREG32(mmSH_MEM_CONFIG, tmp);
3777                 }
3778
3779                 WREG32(mmSH_MEM_APE1_BASE, 1);
3780                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3781                 WREG32(mmSH_MEM_BASES, 0);
3782         }
3783         vi_srbm_select(adev, 0, 0, 0, 0);
3784         mutex_unlock(&adev->srbm_mutex);
3785
3786         gfx_v8_0_init_compute_vmid(adev);
3787
3788         mutex_lock(&adev->grbm_idx_mutex);
3789         /*
3790          * making sure that the following register writes will be broadcasted
3791          * to all the shaders
3792          */
3793         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794
3795         WREG32(mmPA_SC_FIFO_SIZE,
3796                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3797                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3798                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3799                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3800                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3801                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3802                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3803                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3804         mutex_unlock(&adev->grbm_idx_mutex);
3805
3806 }
3807
3808 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3809 {
3810         u32 i, j, k;
3811         u32 mask;
3812
3813         mutex_lock(&adev->grbm_idx_mutex);
3814         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3815                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3816                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3817                         for (k = 0; k < adev->usec_timeout; k++) {
3818                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3819                                         break;
3820                                 udelay(1);
3821                         }
3822                 }
3823         }
3824         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3825         mutex_unlock(&adev->grbm_idx_mutex);
3826
3827         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3828                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3829                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3830                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3831         for (k = 0; k < adev->usec_timeout; k++) {
3832                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3833                         break;
3834                 udelay(1);
3835         }
3836 }
3837
3838 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3839                                                bool enable)
3840 {
3841         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3842
3843         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3844         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3845         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3846         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3847
3848         WREG32(mmCP_INT_CNTL_RING0, tmp);
3849 }
3850
3851 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3852 {
3853         /* csib */
3854         WREG32(mmRLC_CSIB_ADDR_HI,
3855                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3856         WREG32(mmRLC_CSIB_ADDR_LO,
3857                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3858         WREG32(mmRLC_CSIB_LENGTH,
3859                         adev->gfx.rlc.clear_state_size);
3860 }
3861
3862 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3863                                 int ind_offset,
3864                                 int list_size,
3865                                 int *unique_indices,
3866                                 int *indices_count,
3867                                 int max_indices,
3868                                 int *ind_start_offsets,
3869                                 int *offset_count,
3870                                 int max_offset)
3871 {
3872         int indices;
3873         bool new_entry = true;
3874
3875         for (; ind_offset < list_size; ind_offset++) {
3876
3877                 if (new_entry) {
3878                         new_entry = false;
3879                         ind_start_offsets[*offset_count] = ind_offset;
3880                         *offset_count = *offset_count + 1;
3881                         BUG_ON(*offset_count >= max_offset);
3882                 }
3883
3884                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3885                         new_entry = true;
3886                         continue;
3887                 }
3888
3889                 ind_offset += 2;
3890
3891                 /* look for the matching indice */
3892                 for (indices = 0;
3893                         indices < *indices_count;
3894                         indices++) {
3895                         if (unique_indices[indices] ==
3896                                 register_list_format[ind_offset])
3897                                 break;
3898                 }
3899
3900                 if (indices >= *indices_count) {
3901                         unique_indices[*indices_count] =
3902                                 register_list_format[ind_offset];
3903                         indices = *indices_count;
3904                         *indices_count = *indices_count + 1;
3905                         BUG_ON(*indices_count >= max_indices);
3906                 }
3907
3908                 register_list_format[ind_offset] = indices;
3909         }
3910 }
3911
3912 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3913 {
3914         int i, temp, data;
3915         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3916         int indices_count = 0;
3917         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3918         int offset_count = 0;
3919
3920         int list_size;
3921         unsigned int *register_list_format =
3922                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3923         if (!register_list_format)
3924                 return -ENOMEM;
3925         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3926                         adev->gfx.rlc.reg_list_format_size_bytes);
3927
3928         gfx_v8_0_parse_ind_reg_list(register_list_format,
3929                                 RLC_FormatDirectRegListLength,
3930                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3931                                 unique_indices,
3932                                 &indices_count,
3933                                 sizeof(unique_indices) / sizeof(int),
3934                                 indirect_start_offsets,
3935                                 &offset_count,
3936                                 sizeof(indirect_start_offsets)/sizeof(int));
3937
3938         /* save and restore list */
3939         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3940
3941         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3942         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3943                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3944
3945         /* indirect list */
3946         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3947         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3948                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3949
3950         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3951         list_size = list_size >> 1;
3952         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3953         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3954
3955         /* starting offsets starts */
3956         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3957                 adev->gfx.rlc.starting_offsets_start);
3958         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3959                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3960                                 indirect_start_offsets[i]);
3961
3962         /* unique indices */
3963         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3964         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3965         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3966                 if (unique_indices[i] != 0) {
3967                         amdgpu_mm_wreg(adev, temp + i,
3968                                         unique_indices[i] & 0x3FFFF, false);
3969                         amdgpu_mm_wreg(adev, data + i,
3970                                         unique_indices[i] >> 20, false);
3971                 }
3972         }
3973         kfree(register_list_format);
3974
3975         return 0;
3976 }
3977
3978 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3979 {
3980         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3981 }
3982
3983 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3984 {
3985         uint32_t data;
3986
3987         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3988
3989         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3990         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3991         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3992         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3993         WREG32(mmRLC_PG_DELAY, data);
3994
3995         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3996         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3997
3998 }
3999
4000 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4001                                                 bool enable)
4002 {
4003         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4004 }
4005
4006 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4007                                                   bool enable)
4008 {
4009         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4010 }
4011
4012 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4013 {
4014         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4015 }
4016
4017 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4018 {
4019         if ((adev->asic_type == CHIP_CARRIZO) ||
4020             (adev->asic_type == CHIP_STONEY)) {
4021                 gfx_v8_0_init_csb(adev);
4022                 gfx_v8_0_init_save_restore_list(adev);
4023                 gfx_v8_0_enable_save_restore_machine(adev);
4024                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4025                 gfx_v8_0_init_power_gating(adev);
4026                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4027         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4028                    (adev->asic_type == CHIP_POLARIS12)) {
4029                 gfx_v8_0_init_csb(adev);
4030                 gfx_v8_0_init_save_restore_list(adev);
4031                 gfx_v8_0_enable_save_restore_machine(adev);
4032                 gfx_v8_0_init_power_gating(adev);
4033         }
4034
4035 }
4036
4037 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4038 {
4039         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4040
4041         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4042         gfx_v8_0_wait_for_rlc_serdes(adev);
4043 }
4044
4045 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4046 {
4047         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4048         udelay(50);
4049
4050         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4051         udelay(50);
4052 }
4053
4054 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4055 {
4056         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4057
4058         /* carrizo do enable cp interrupt after cp inited */
4059         if (!(adev->flags & AMD_IS_APU))
4060                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4061
4062         udelay(50);
4063 }
4064
4065 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4066 {
4067         const struct rlc_firmware_header_v2_0 *hdr;
4068         const __le32 *fw_data;
4069         unsigned i, fw_size;
4070
4071         if (!adev->gfx.rlc_fw)
4072                 return -EINVAL;
4073
4074         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4075         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4076
4077         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4078                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4079         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4080
4081         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4082         for (i = 0; i < fw_size; i++)
4083                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4084         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4085
4086         return 0;
4087 }
4088
4089 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4090 {
4091         int r;
4092         u32 tmp;
4093
4094         gfx_v8_0_rlc_stop(adev);
4095
4096         /* disable CG */
4097         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4098         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4099                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4100         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4101         if (adev->asic_type == CHIP_POLARIS11 ||
4102             adev->asic_type == CHIP_POLARIS10 ||
4103             adev->asic_type == CHIP_POLARIS12) {
4104                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4105                 tmp &= ~0x3;
4106                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4107         }
4108
4109         /* disable PG */
4110         WREG32(mmRLC_PG_CNTL, 0);
4111
4112         gfx_v8_0_rlc_reset(adev);
4113         gfx_v8_0_init_pg(adev);
4114
4115         if (!adev->pp_enabled) {
4116                 if (!adev->firmware.smu_load) {
4117                         /* legacy rlc firmware loading */
4118                         r = gfx_v8_0_rlc_load_microcode(adev);
4119                         if (r)
4120                                 return r;
4121                 } else {
4122                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4123                                                         AMDGPU_UCODE_ID_RLC_G);
4124                         if (r)
4125                                 return -EINVAL;
4126                 }
4127         }
4128
4129         gfx_v8_0_rlc_start(adev);
4130
4131         return 0;
4132 }
4133
4134 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4135 {
4136         int i;
4137         u32 tmp = RREG32(mmCP_ME_CNTL);
4138
4139         if (enable) {
4140                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4141                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4142                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4143         } else {
4144                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4145                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4146                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4147                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4148                         adev->gfx.gfx_ring[i].ready = false;
4149         }
4150         WREG32(mmCP_ME_CNTL, tmp);
4151         udelay(50);
4152 }
4153
4154 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4155 {
4156         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4157         const struct gfx_firmware_header_v1_0 *ce_hdr;
4158         const struct gfx_firmware_header_v1_0 *me_hdr;
4159         const __le32 *fw_data;
4160         unsigned i, fw_size;
4161
4162         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4163                 return -EINVAL;
4164
4165         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4166                 adev->gfx.pfp_fw->data;
4167         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4168                 adev->gfx.ce_fw->data;
4169         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4170                 adev->gfx.me_fw->data;
4171
4172         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4173         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4174         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4175
4176         gfx_v8_0_cp_gfx_enable(adev, false);
4177
4178         /* PFP */
4179         fw_data = (const __le32 *)
4180                 (adev->gfx.pfp_fw->data +
4181                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4182         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4183         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4184         for (i = 0; i < fw_size; i++)
4185                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4186         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4187
4188         /* CE */
4189         fw_data = (const __le32 *)
4190                 (adev->gfx.ce_fw->data +
4191                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4192         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4193         WREG32(mmCP_CE_UCODE_ADDR, 0);
4194         for (i = 0; i < fw_size; i++)
4195                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4196         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4197
4198         /* ME */
4199         fw_data = (const __le32 *)
4200                 (adev->gfx.me_fw->data +
4201                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4202         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4203         WREG32(mmCP_ME_RAM_WADDR, 0);
4204         for (i = 0; i < fw_size; i++)
4205                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4206         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4207
4208         return 0;
4209 }
4210
4211 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4212 {
4213         u32 count = 0;
4214         const struct cs_section_def *sect = NULL;
4215         const struct cs_extent_def *ext = NULL;
4216
4217         /* begin clear state */
4218         count += 2;
4219         /* context control state */
4220         count += 3;
4221
4222         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4223                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4224                         if (sect->id == SECT_CONTEXT)
4225                                 count += 2 + ext->reg_count;
4226                         else
4227                                 return 0;
4228                 }
4229         }
4230         /* pa_sc_raster_config/pa_sc_raster_config1 */
4231         count += 4;
4232         /* end clear state */
4233         count += 2;
4234         /* clear state */
4235         count += 2;
4236
4237         return count;
4238 }
4239
4240 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4241 {
4242         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4243         const struct cs_section_def *sect = NULL;
4244         const struct cs_extent_def *ext = NULL;
4245         int r, i;
4246
4247         /* init the CP */
4248         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4249         WREG32(mmCP_ENDIAN_SWAP, 0);
4250         WREG32(mmCP_DEVICE_ID, 1);
4251
4252         gfx_v8_0_cp_gfx_enable(adev, true);
4253
4254         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4255         if (r) {
4256                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4257                 return r;
4258         }
4259
4260         /* clear state buffer */
4261         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4262         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4263
4264         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4265         amdgpu_ring_write(ring, 0x80000000);
4266         amdgpu_ring_write(ring, 0x80000000);
4267
4268         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4269                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4270                         if (sect->id == SECT_CONTEXT) {
4271                                 amdgpu_ring_write(ring,
4272                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4273                                                ext->reg_count));
4274                                 amdgpu_ring_write(ring,
4275                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4276                                 for (i = 0; i < ext->reg_count; i++)
4277                                         amdgpu_ring_write(ring, ext->extent[i]);
4278                         }
4279                 }
4280         }
4281
4282         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4283         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4284         switch (adev->asic_type) {
4285         case CHIP_TONGA:
4286         case CHIP_POLARIS10:
4287                 amdgpu_ring_write(ring, 0x16000012);
4288                 amdgpu_ring_write(ring, 0x0000002A);
4289                 break;
4290         case CHIP_POLARIS11:
4291         case CHIP_POLARIS12:
4292                 amdgpu_ring_write(ring, 0x16000012);
4293                 amdgpu_ring_write(ring, 0x00000000);
4294                 break;
4295         case CHIP_FIJI:
4296                 amdgpu_ring_write(ring, 0x3a00161a);
4297                 amdgpu_ring_write(ring, 0x0000002e);
4298                 break;
4299         case CHIP_CARRIZO:
4300                 amdgpu_ring_write(ring, 0x00000002);
4301                 amdgpu_ring_write(ring, 0x00000000);
4302                 break;
4303         case CHIP_TOPAZ:
4304                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4305                                 0x00000000 : 0x00000002);
4306                 amdgpu_ring_write(ring, 0x00000000);
4307                 break;
4308         case CHIP_STONEY:
4309                 amdgpu_ring_write(ring, 0x00000000);
4310                 amdgpu_ring_write(ring, 0x00000000);
4311                 break;
4312         default:
4313                 BUG();
4314         }
4315
4316         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4317         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4318
4319         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4320         amdgpu_ring_write(ring, 0);
4321
4322         /* init the CE partitions */
4323         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4324         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4325         amdgpu_ring_write(ring, 0x8000);
4326         amdgpu_ring_write(ring, 0x8000);
4327
4328         amdgpu_ring_commit(ring);
4329
4330         return 0;
4331 }
4332
4333 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4334 {
4335         struct amdgpu_ring *ring;
4336         u32 tmp;
4337         u32 rb_bufsz;
4338         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4339         int r;
4340
4341         /* Set the write pointer delay */
4342         WREG32(mmCP_RB_WPTR_DELAY, 0);
4343
4344         /* set the RB to use vmid 0 */
4345         WREG32(mmCP_RB_VMID, 0);
4346
4347         /* Set ring buffer size */
4348         ring = &adev->gfx.gfx_ring[0];
4349         rb_bufsz = order_base_2(ring->ring_size / 8);
4350         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4351         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4352         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4353         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4354 #ifdef __BIG_ENDIAN
4355         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4356 #endif
4357         WREG32(mmCP_RB0_CNTL, tmp);
4358
4359         /* Initialize the ring buffer's read and write pointers */
4360         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4361         ring->wptr = 0;
4362         WREG32(mmCP_RB0_WPTR, ring->wptr);
4363
4364         /* set the wb address wether it's enabled or not */
4365         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4366         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4367         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4368
4369         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4370         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4371         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4372         mdelay(1);
4373         WREG32(mmCP_RB0_CNTL, tmp);
4374
4375         rb_addr = ring->gpu_addr >> 8;
4376         WREG32(mmCP_RB0_BASE, rb_addr);
4377         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4378
4379         /* no gfx doorbells on iceland */
4380         if (adev->asic_type != CHIP_TOPAZ) {
4381                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4382                 if (ring->use_doorbell) {
4383                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4384                                             DOORBELL_OFFSET, ring->doorbell_index);
4385                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4386                                             DOORBELL_HIT, 0);
4387                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4388                                             DOORBELL_EN, 1);
4389                 } else {
4390                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4391                                             DOORBELL_EN, 0);
4392                 }
4393                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4394
4395                 if (adev->asic_type == CHIP_TONGA) {
4396                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4397                                             DOORBELL_RANGE_LOWER,
4398                                             AMDGPU_DOORBELL_GFX_RING0);
4399                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4400
4401                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4402                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4403                 }
4404
4405         }
4406
4407         /* start the ring */
4408         gfx_v8_0_cp_gfx_start(adev);
4409         ring->ready = true;
4410         r = amdgpu_ring_test_ring(ring);
4411         if (r)
4412                 ring->ready = false;
4413
4414         return r;
4415 }
4416
4417 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4418 {
4419         int i;
4420
4421         if (enable) {
4422                 WREG32(mmCP_MEC_CNTL, 0);
4423         } else {
4424                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4425                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4426                         adev->gfx.compute_ring[i].ready = false;
4427         }
4428         udelay(50);
4429 }
4430
4431 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4432 {
4433         const struct gfx_firmware_header_v1_0 *mec_hdr;
4434         const __le32 *fw_data;
4435         unsigned i, fw_size;
4436
4437         if (!adev->gfx.mec_fw)
4438                 return -EINVAL;
4439
4440         gfx_v8_0_cp_compute_enable(adev, false);
4441
4442         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4443         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4444
4445         fw_data = (const __le32 *)
4446                 (adev->gfx.mec_fw->data +
4447                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4448         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4449
4450         /* MEC1 */
4451         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4452         for (i = 0; i < fw_size; i++)
4453                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4454         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4455
4456         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4457         if (adev->gfx.mec2_fw) {
4458                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4459
4460                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4461                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4462
4463                 fw_data = (const __le32 *)
4464                         (adev->gfx.mec2_fw->data +
4465                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4466                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4467
4468                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4469                 for (i = 0; i < fw_size; i++)
4470                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4471                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4472         }
4473
4474         return 0;
4475 }
4476
4477 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4478 {
4479         int i, r;
4480
4481         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4482                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4483
4484                 if (ring->mqd_obj) {
4485                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4486                         if (unlikely(r != 0))
4487                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4488
4489                         amdgpu_bo_unpin(ring->mqd_obj);
4490                         amdgpu_bo_unreserve(ring->mqd_obj);
4491
4492                         amdgpu_bo_unref(&ring->mqd_obj);
4493                         ring->mqd_obj = NULL;
4494                 }
4495         }
4496 }
4497
4498 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4499 {
4500         int r, i, j;
4501         u32 tmp;
4502         bool use_doorbell = true;
4503         u64 hqd_gpu_addr;
4504         u64 mqd_gpu_addr;
4505         u64 eop_gpu_addr;
4506         u64 wb_gpu_addr;
4507         u32 *buf;
4508         struct vi_mqd *mqd;
4509
4510         /* init the queues.  */
4511         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4512                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4513
4514                 if (ring->mqd_obj == NULL) {
4515                         r = amdgpu_bo_create(adev,
4516                                              sizeof(struct vi_mqd),
4517                                              PAGE_SIZE, true,
4518                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4519                                              NULL, &ring->mqd_obj);
4520                         if (r) {
4521                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4522                                 return r;
4523                         }
4524                 }
4525
4526                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4527                 if (unlikely(r != 0)) {
4528                         gfx_v8_0_cp_compute_fini(adev);
4529                         return r;
4530                 }
4531                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4532                                   &mqd_gpu_addr);
4533                 if (r) {
4534                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4535                         gfx_v8_0_cp_compute_fini(adev);
4536                         return r;
4537                 }
4538                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4539                 if (r) {
4540                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4541                         gfx_v8_0_cp_compute_fini(adev);
4542                         return r;
4543                 }
4544
4545                 /* init the mqd struct */
4546                 memset(buf, 0, sizeof(struct vi_mqd));
4547
4548                 mqd = (struct vi_mqd *)buf;
4549                 mqd->header = 0xC0310800;
4550                 mqd->compute_pipelinestat_enable = 0x00000001;
4551                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4552                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4553                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4554                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4555                 mqd->compute_misc_reserved = 0x00000003;
4556
4557                 mutex_lock(&adev->srbm_mutex);
4558                 vi_srbm_select(adev, ring->me,
4559                                ring->pipe,
4560                                ring->queue, 0);
4561
4562                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4563                 eop_gpu_addr >>= 8;
4564
4565                 /* write the EOP addr */
4566                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4567                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4568
4569                 /* set the VMID assigned */
4570                 WREG32(mmCP_HQD_VMID, 0);
4571
4572                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4573                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4574                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4575                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4576                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4577
4578                 /* disable wptr polling */
4579                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4580                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4581                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4582
4583                 mqd->cp_hqd_eop_base_addr_lo =
4584                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4585                 mqd->cp_hqd_eop_base_addr_hi =
4586                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4587
4588                 /* enable doorbell? */
4589                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4590                 if (use_doorbell) {
4591                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4592                 } else {
4593                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4594                 }
4595                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4596                 mqd->cp_hqd_pq_doorbell_control = tmp;
4597
4598                 /* disable the queue if it's active */
4599                 mqd->cp_hqd_dequeue_request = 0;
4600                 mqd->cp_hqd_pq_rptr = 0;
4601                 mqd->cp_hqd_pq_wptr= 0;
4602                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4603                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4604                         for (j = 0; j < adev->usec_timeout; j++) {
4605                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4606                                         break;
4607                                 udelay(1);
4608                         }
4609                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4610                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4611                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4612                 }
4613
4614                 /* set the pointer to the MQD */
4615                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4616                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4617                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4618                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4619
4620                 /* set MQD vmid to 0 */
4621                 tmp = RREG32(mmCP_MQD_CONTROL);
4622                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4623                 WREG32(mmCP_MQD_CONTROL, tmp);
4624                 mqd->cp_mqd_control = tmp;
4625
4626                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4627                 hqd_gpu_addr = ring->gpu_addr >> 8;
4628                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4629                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4630                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4631                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4632
4633                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4634                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4635                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4636                                     (order_base_2(ring->ring_size / 4) - 1));
4637                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4638                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4639 #ifdef __BIG_ENDIAN
4640                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4641 #endif
4642                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4643                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4644                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4645                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4646                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4647                 mqd->cp_hqd_pq_control = tmp;
4648
4649                 /* set the wb address wether it's enabled or not */
4650                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4651                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4652                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4653                         upper_32_bits(wb_gpu_addr) & 0xffff;
4654                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4655                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4656                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4657                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4658
4659                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4660                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4661                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4662                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4663                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4664                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4665                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4666
4667                 /* enable the doorbell if requested */
4668                 if (use_doorbell) {
4669                         if ((adev->asic_type == CHIP_CARRIZO) ||
4670                             (adev->asic_type == CHIP_FIJI) ||
4671                             (adev->asic_type == CHIP_STONEY) ||
4672                             (adev->asic_type == CHIP_POLARIS11) ||
4673                             (adev->asic_type == CHIP_POLARIS10) ||
4674                             (adev->asic_type == CHIP_POLARIS12)) {
4675                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4676                                        AMDGPU_DOORBELL_KIQ << 2);
4677                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4678                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4679                         }
4680                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4681                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4682                                             DOORBELL_OFFSET, ring->doorbell_index);
4683                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4684                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4685                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4686                         mqd->cp_hqd_pq_doorbell_control = tmp;
4687
4688                 } else {
4689                         mqd->cp_hqd_pq_doorbell_control = 0;
4690                 }
4691                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4692                        mqd->cp_hqd_pq_doorbell_control);
4693
4694                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4695                 ring->wptr = 0;
4696                 mqd->cp_hqd_pq_wptr = ring->wptr;
4697                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4698                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4699
4700                 /* set the vmid for the queue */
4701                 mqd->cp_hqd_vmid = 0;
4702                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4703
4704                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4705                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4706                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4707                 mqd->cp_hqd_persistent_state = tmp;
4708                 if (adev->asic_type == CHIP_STONEY ||
4709                         adev->asic_type == CHIP_POLARIS11 ||
4710                         adev->asic_type == CHIP_POLARIS10 ||
4711                         adev->asic_type == CHIP_POLARIS12) {
4712                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4713                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4714                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4715                 }
4716
4717                 /* activate the queue */
4718                 mqd->cp_hqd_active = 1;
4719                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4720
4721                 vi_srbm_select(adev, 0, 0, 0, 0);
4722                 mutex_unlock(&adev->srbm_mutex);
4723
4724                 amdgpu_bo_kunmap(ring->mqd_obj);
4725                 amdgpu_bo_unreserve(ring->mqd_obj);
4726         }
4727
4728         if (use_doorbell) {
4729                 tmp = RREG32(mmCP_PQ_STATUS);
4730                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4731                 WREG32(mmCP_PQ_STATUS, tmp);
4732         }
4733
4734         gfx_v8_0_cp_compute_enable(adev, true);
4735
4736         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4737                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4738
4739                 ring->ready = true;
4740                 r = amdgpu_ring_test_ring(ring);
4741                 if (r)
4742                         ring->ready = false;
4743         }
4744
4745         return 0;
4746 }
4747
4748 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4749 {
4750         int r;
4751
4752         if (!(adev->flags & AMD_IS_APU))
4753                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4754
4755         if (!adev->pp_enabled) {
4756                 if (!adev->firmware.smu_load) {
4757                         /* legacy firmware loading */
4758                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4759                         if (r)
4760                                 return r;
4761
4762                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4763                         if (r)
4764                                 return r;
4765                 } else {
4766                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4767                                                         AMDGPU_UCODE_ID_CP_CE);
4768                         if (r)
4769                                 return -EINVAL;
4770
4771                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4772                                                         AMDGPU_UCODE_ID_CP_PFP);
4773                         if (r)
4774                                 return -EINVAL;
4775
4776                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4777                                                         AMDGPU_UCODE_ID_CP_ME);
4778                         if (r)
4779                                 return -EINVAL;
4780
4781                         if (adev->asic_type == CHIP_TOPAZ) {
4782                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4783                                 if (r)
4784                                         return r;
4785                         } else {
4786                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4787                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4788                                 if (r)
4789                                         return -EINVAL;
4790                         }
4791                 }
4792         }
4793
4794         r = gfx_v8_0_cp_gfx_resume(adev);
4795         if (r)
4796                 return r;
4797
4798         r = gfx_v8_0_cp_compute_resume(adev);
4799         if (r)
4800                 return r;
4801
4802         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4803
4804         return 0;
4805 }
4806
4807 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4808 {
4809         gfx_v8_0_cp_gfx_enable(adev, enable);
4810         gfx_v8_0_cp_compute_enable(adev, enable);
4811 }
4812
4813 static int gfx_v8_0_hw_init(void *handle)
4814 {
4815         int r;
4816         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4817
4818         gfx_v8_0_init_golden_registers(adev);
4819         gfx_v8_0_gpu_init(adev);
4820
4821         r = gfx_v8_0_rlc_resume(adev);
4822         if (r)
4823                 return r;
4824
4825         r = gfx_v8_0_cp_resume(adev);
4826
4827         return r;
4828 }
4829
4830 static int gfx_v8_0_hw_fini(void *handle)
4831 {
4832         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4833
4834         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4835         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4836         if (amdgpu_sriov_vf(adev)) {
4837                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4838                 return 0;
4839         }
4840         gfx_v8_0_cp_enable(adev, false);
4841         gfx_v8_0_rlc_stop(adev);
4842         gfx_v8_0_cp_compute_fini(adev);
4843
4844         amdgpu_set_powergating_state(adev,
4845                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4846
4847         return 0;
4848 }
4849
4850 static int gfx_v8_0_suspend(void *handle)
4851 {
4852         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4853
4854         return gfx_v8_0_hw_fini(adev);
4855 }
4856
4857 static int gfx_v8_0_resume(void *handle)
4858 {
4859         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4860
4861         return gfx_v8_0_hw_init(adev);
4862 }
4863
4864 static bool gfx_v8_0_is_idle(void *handle)
4865 {
4866         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4867
4868         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4869                 return false;
4870         else
4871                 return true;
4872 }
4873
4874 static int gfx_v8_0_wait_for_idle(void *handle)
4875 {
4876         unsigned i;
4877         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4878
4879         for (i = 0; i < adev->usec_timeout; i++) {
4880                 if (gfx_v8_0_is_idle(handle))
4881                         return 0;
4882
4883                 udelay(1);
4884         }
4885         return -ETIMEDOUT;
4886 }
4887
4888 static bool gfx_v8_0_check_soft_reset(void *handle)
4889 {
4890         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4891         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4892         u32 tmp;
4893
4894         /* GRBM_STATUS */
4895         tmp = RREG32(mmGRBM_STATUS);
4896         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4897                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4898                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4899                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4900                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4901                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4902                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4903                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4904                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4905                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4906                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4907                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4908                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4909         }
4910
4911         /* GRBM_STATUS2 */
4912         tmp = RREG32(mmGRBM_STATUS2);
4913         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4914                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4915                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4916
4917         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4918             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4919             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4920                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4921                                                 SOFT_RESET_CPF, 1);
4922                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4923                                                 SOFT_RESET_CPC, 1);
4924                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4925                                                 SOFT_RESET_CPG, 1);
4926                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4927                                                 SOFT_RESET_GRBM, 1);
4928         }
4929
4930         /* SRBM_STATUS */
4931         tmp = RREG32(mmSRBM_STATUS);
4932         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4933                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4934                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4935         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4936                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4937                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4938
4939         if (grbm_soft_reset || srbm_soft_reset) {
4940                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4941                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4942                 return true;
4943         } else {
4944                 adev->gfx.grbm_soft_reset = 0;
4945                 adev->gfx.srbm_soft_reset = 0;
4946                 return false;
4947         }
4948 }
4949
4950 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4951                                   struct amdgpu_ring *ring)
4952 {
4953         int i;
4954
4955         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4956         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4957                 u32 tmp;
4958                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4959                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4960                                     DEQUEUE_REQ, 2);
4961                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4962                 for (i = 0; i < adev->usec_timeout; i++) {
4963                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4964                                 break;
4965                         udelay(1);
4966                 }
4967         }
4968 }
4969
4970 static int gfx_v8_0_pre_soft_reset(void *handle)
4971 {
4972         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4973         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4974
4975         if ((!adev->gfx.grbm_soft_reset) &&
4976             (!adev->gfx.srbm_soft_reset))
4977                 return 0;
4978
4979         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4980         srbm_soft_reset = adev->gfx.srbm_soft_reset;
4981
4982         /* stop the rlc */
4983         gfx_v8_0_rlc_stop(adev);
4984
4985         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4986             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4987                 /* Disable GFX parsing/prefetching */
4988                 gfx_v8_0_cp_gfx_enable(adev, false);
4989
4990         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4991             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4992             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4993             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4994                 int i;
4995
4996                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4997                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4998
4999                         gfx_v8_0_inactive_hqd(adev, ring);
5000                 }
5001                 /* Disable MEC parsing/prefetching */
5002                 gfx_v8_0_cp_compute_enable(adev, false);
5003         }
5004
5005        return 0;
5006 }
5007
5008 static int gfx_v8_0_soft_reset(void *handle)
5009 {
5010         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5011         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5012         u32 tmp;
5013
5014         if ((!adev->gfx.grbm_soft_reset) &&
5015             (!adev->gfx.srbm_soft_reset))
5016                 return 0;
5017
5018         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5019         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5020
5021         if (grbm_soft_reset || srbm_soft_reset) {
5022                 tmp = RREG32(mmGMCON_DEBUG);
5023                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5024                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5025                 WREG32(mmGMCON_DEBUG, tmp);
5026                 udelay(50);
5027         }
5028
5029         if (grbm_soft_reset) {
5030                 tmp = RREG32(mmGRBM_SOFT_RESET);
5031                 tmp |= grbm_soft_reset;
5032                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5033                 WREG32(mmGRBM_SOFT_RESET, tmp);
5034                 tmp = RREG32(mmGRBM_SOFT_RESET);
5035
5036                 udelay(50);
5037
5038                 tmp &= ~grbm_soft_reset;
5039                 WREG32(mmGRBM_SOFT_RESET, tmp);
5040                 tmp = RREG32(mmGRBM_SOFT_RESET);
5041         }
5042
5043         if (srbm_soft_reset) {
5044                 tmp = RREG32(mmSRBM_SOFT_RESET);
5045                 tmp |= srbm_soft_reset;
5046                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5047                 WREG32(mmSRBM_SOFT_RESET, tmp);
5048                 tmp = RREG32(mmSRBM_SOFT_RESET);
5049
5050                 udelay(50);
5051
5052                 tmp &= ~srbm_soft_reset;
5053                 WREG32(mmSRBM_SOFT_RESET, tmp);
5054                 tmp = RREG32(mmSRBM_SOFT_RESET);
5055         }
5056
5057         if (grbm_soft_reset || srbm_soft_reset) {
5058                 tmp = RREG32(mmGMCON_DEBUG);
5059                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5060                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5061                 WREG32(mmGMCON_DEBUG, tmp);
5062         }
5063
5064         /* Wait a little for things to settle down */
5065         udelay(50);
5066
5067         return 0;
5068 }
5069
5070 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5071                               struct amdgpu_ring *ring)
5072 {
5073         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5074         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5075         WREG32(mmCP_HQD_PQ_RPTR, 0);
5076         WREG32(mmCP_HQD_PQ_WPTR, 0);
5077         vi_srbm_select(adev, 0, 0, 0, 0);
5078 }
5079
5080 static int gfx_v8_0_post_soft_reset(void *handle)
5081 {
5082         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5083         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5084
5085         if ((!adev->gfx.grbm_soft_reset) &&
5086             (!adev->gfx.srbm_soft_reset))
5087                 return 0;
5088
5089         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5090         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5091
5092         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5093             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5094                 gfx_v8_0_cp_gfx_resume(adev);
5095
5096         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5097             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5098             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5099             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5100                 int i;
5101
5102                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5103                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5104
5105                         gfx_v8_0_init_hqd(adev, ring);
5106                 }
5107                 gfx_v8_0_cp_compute_resume(adev);
5108         }
5109         gfx_v8_0_rlc_start(adev);
5110
5111         return 0;
5112 }
5113
5114 /**
5115  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5116  *
5117  * @adev: amdgpu_device pointer
5118  *
5119  * Fetches a GPU clock counter snapshot.
5120  * Returns the 64 bit clock counter snapshot.
5121  */
5122 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5123 {
5124         uint64_t clock;
5125
5126         mutex_lock(&adev->gfx.gpu_clock_mutex);
5127         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5128         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5129                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5130         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5131         return clock;
5132 }
5133
5134 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5135                                           uint32_t vmid,
5136                                           uint32_t gds_base, uint32_t gds_size,
5137                                           uint32_t gws_base, uint32_t gws_size,
5138                                           uint32_t oa_base, uint32_t oa_size)
5139 {
5140         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5141         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5142
5143         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5144         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5145
5146         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5147         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5148
5149         /* GDS Base */
5150         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5151         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5152                                 WRITE_DATA_DST_SEL(0)));
5153         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5154         amdgpu_ring_write(ring, 0);
5155         amdgpu_ring_write(ring, gds_base);
5156
5157         /* GDS Size */
5158         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5159         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5160                                 WRITE_DATA_DST_SEL(0)));
5161         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5162         amdgpu_ring_write(ring, 0);
5163         amdgpu_ring_write(ring, gds_size);
5164
5165         /* GWS */
5166         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5167         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5168                                 WRITE_DATA_DST_SEL(0)));
5169         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5170         amdgpu_ring_write(ring, 0);
5171         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5172
5173         /* OA */
5174         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5175         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5176                                 WRITE_DATA_DST_SEL(0)));
5177         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5178         amdgpu_ring_write(ring, 0);
5179         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5180 }
5181
5182 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5183 {
5184         WREG32(mmSQ_IND_INDEX,
5185                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5186                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5187                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5188                 (SQ_IND_INDEX__FORCE_READ_MASK));
5189         return RREG32(mmSQ_IND_DATA);
5190 }
5191
5192 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5193                            uint32_t wave, uint32_t thread,
5194                            uint32_t regno, uint32_t num, uint32_t *out)
5195 {
5196         WREG32(mmSQ_IND_INDEX,
5197                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5198                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5199                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5200                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5201                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5202                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5203         while (num--)
5204                 *(out++) = RREG32(mmSQ_IND_DATA);
5205 }
5206
5207 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5208 {
5209         /* type 0 wave data */
5210         dst[(*no_fields)++] = 0;
5211         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5212         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5213         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5214         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5215         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5216         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5217         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5218         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5219         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5220         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5221         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5222         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5223         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5224         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5225         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5226         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5227         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5228         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5229 }
5230
5231 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5232                                      uint32_t wave, uint32_t start,
5233                                      uint32_t size, uint32_t *dst)
5234 {
5235         wave_read_regs(
5236                 adev, simd, wave, 0,
5237                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5238 }
5239
5240
5241 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5242         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5243         .select_se_sh = &gfx_v8_0_select_se_sh,
5244         .read_wave_data = &gfx_v8_0_read_wave_data,
5245         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5246 };
5247
5248 static int gfx_v8_0_early_init(void *handle)
5249 {
5250         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5251
5252         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5253         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5254         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5255         gfx_v8_0_set_ring_funcs(adev);
5256         gfx_v8_0_set_irq_funcs(adev);
5257         gfx_v8_0_set_gds_init(adev);
5258         gfx_v8_0_set_rlc_funcs(adev);
5259
5260         return 0;
5261 }
5262
5263 static int gfx_v8_0_late_init(void *handle)
5264 {
5265         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5266         int r;
5267
5268         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5269         if (r)
5270                 return r;
5271
5272         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5273         if (r)
5274                 return r;
5275
5276         /* requires IBs so do in late init after IB pool is initialized */
5277         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5278         if (r)
5279                 return r;
5280
5281         amdgpu_set_powergating_state(adev,
5282                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5283
5284         return 0;
5285 }
5286
5287 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5288                                                        bool enable)
5289 {
5290         if ((adev->asic_type == CHIP_POLARIS11) ||
5291             (adev->asic_type == CHIP_POLARIS12))
5292                 /* Send msg to SMU via Powerplay */
5293                 amdgpu_set_powergating_state(adev,
5294                                              AMD_IP_BLOCK_TYPE_SMC,
5295                                              enable ?
5296                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5297
5298         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5299 }
5300
5301 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5302                                                         bool enable)
5303 {
5304         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5305 }
5306
5307 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5308                 bool enable)
5309 {
5310         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5311 }
5312
5313 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5314                                           bool enable)
5315 {
5316         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5317 }
5318
5319 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5320                                                 bool enable)
5321 {
5322         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5323
5324         /* Read any GFX register to wake up GFX. */
5325         if (!enable)
5326                 RREG32(mmDB_RENDER_CONTROL);
5327 }
5328
5329 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5330                                           bool enable)
5331 {
5332         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5333                 cz_enable_gfx_cg_power_gating(adev, true);
5334                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5335                         cz_enable_gfx_pipeline_power_gating(adev, true);
5336         } else {
5337                 cz_enable_gfx_cg_power_gating(adev, false);
5338                 cz_enable_gfx_pipeline_power_gating(adev, false);
5339         }
5340 }
5341
5342 static int gfx_v8_0_set_powergating_state(void *handle,
5343                                           enum amd_powergating_state state)
5344 {
5345         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5346         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5347
5348         switch (adev->asic_type) {
5349         case CHIP_CARRIZO:
5350         case CHIP_STONEY:
5351
5352                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5353                         cz_enable_sck_slow_down_on_power_up(adev, true);
5354                         cz_enable_sck_slow_down_on_power_down(adev, true);
5355                 } else {
5356                         cz_enable_sck_slow_down_on_power_up(adev, false);
5357                         cz_enable_sck_slow_down_on_power_down(adev, false);
5358                 }
5359                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5360                         cz_enable_cp_power_gating(adev, true);
5361                 else
5362                         cz_enable_cp_power_gating(adev, false);
5363
5364                 cz_update_gfx_cg_power_gating(adev, enable);
5365
5366                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5367                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5368                 else
5369                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5370
5371                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5372                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5373                 else
5374                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5375                 break;
5376         case CHIP_POLARIS11:
5377         case CHIP_POLARIS12:
5378                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5379                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5380                 else
5381                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5382
5383                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5384                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5385                 else
5386                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5387
5388                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5389                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5390                 else
5391                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5392                 break;
5393         default:
5394                 break;
5395         }
5396
5397         return 0;
5398 }
5399
5400 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5401                                      uint32_t reg_addr, uint32_t cmd)
5402 {
5403         uint32_t data;
5404
5405         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5406
5407         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5408         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5409
5410         data = RREG32(mmRLC_SERDES_WR_CTRL);
5411         if (adev->asic_type == CHIP_STONEY)
5412                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5413                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5414                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5415                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5416                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5417                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5418                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5419                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5420                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5421         else
5422                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5423                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5424                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5425                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5426                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5427                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5428                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5429                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5430                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5431                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5432                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5433         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5434                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5435                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5436                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5437
5438         WREG32(mmRLC_SERDES_WR_CTRL, data);
5439 }
5440
5441 #define MSG_ENTER_RLC_SAFE_MODE     1
5442 #define MSG_EXIT_RLC_SAFE_MODE      0
5443 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5444 #define RLC_GPR_REG2__REQ__SHIFT 0
5445 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5446 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5447
5448 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5449 {
5450         u32 data = 0;
5451         unsigned i;
5452
5453         data = RREG32(mmRLC_CNTL);
5454         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5455                 return;
5456
5457         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5458             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5459                                AMD_PG_SUPPORT_GFX_DMG))) {
5460                 data |= RLC_GPR_REG2__REQ_MASK;
5461                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5462                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5463                 WREG32(mmRLC_GPR_REG2, data);
5464
5465                 for (i = 0; i < adev->usec_timeout; i++) {
5466                         if ((RREG32(mmRLC_GPM_STAT) &
5467                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5468                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5469                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5470                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5471                                 break;
5472                         udelay(1);
5473                 }
5474
5475                 for (i = 0; i < adev->usec_timeout; i++) {
5476                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5477                                 break;
5478                         udelay(1);
5479                 }
5480                 adev->gfx.rlc.in_safe_mode = true;
5481         }
5482 }
5483
5484 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5485 {
5486         u32 data;
5487         unsigned i;
5488
5489         data = RREG32(mmRLC_CNTL);
5490         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5491                 return;
5492
5493         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5494             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5495                                AMD_PG_SUPPORT_GFX_DMG))) {
5496                 data |= RLC_GPR_REG2__REQ_MASK;
5497                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5498                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5499                 WREG32(mmRLC_GPR_REG2, data);
5500                 adev->gfx.rlc.in_safe_mode = false;
5501         }
5502
5503         for (i = 0; i < adev->usec_timeout; i++) {
5504                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5505                         break;
5506                 udelay(1);
5507         }
5508 }
5509
5510 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5511 {
5512         u32 data;
5513         unsigned i;
5514
5515         data = RREG32(mmRLC_CNTL);
5516         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5517                 return;
5518
5519         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5520                 data |= RLC_SAFE_MODE__CMD_MASK;
5521                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5522                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5523                 WREG32(mmRLC_SAFE_MODE, data);
5524
5525                 for (i = 0; i < adev->usec_timeout; i++) {
5526                         if ((RREG32(mmRLC_GPM_STAT) &
5527                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5528                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5529                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5530                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5531                                 break;
5532                         udelay(1);
5533                 }
5534
5535                 for (i = 0; i < adev->usec_timeout; i++) {
5536                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5537                                 break;
5538                         udelay(1);
5539                 }
5540                 adev->gfx.rlc.in_safe_mode = true;
5541         }
5542 }
5543
5544 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5545 {
5546         u32 data = 0;
5547         unsigned i;
5548
5549         data = RREG32(mmRLC_CNTL);
5550         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5551                 return;
5552
5553         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5554                 if (adev->gfx.rlc.in_safe_mode) {
5555                         data |= RLC_SAFE_MODE__CMD_MASK;
5556                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5557                         WREG32(mmRLC_SAFE_MODE, data);
5558                         adev->gfx.rlc.in_safe_mode = false;
5559                 }
5560         }
5561
5562         for (i = 0; i < adev->usec_timeout; i++) {
5563                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5564                         break;
5565                 udelay(1);
5566         }
5567 }
5568
5569 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5570 {
5571         adev->gfx.rlc.in_safe_mode = true;
5572 }
5573
5574 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5575 {
5576         adev->gfx.rlc.in_safe_mode = false;
5577 }
5578
5579 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5580         .enter_safe_mode = cz_enter_rlc_safe_mode,
5581         .exit_safe_mode = cz_exit_rlc_safe_mode
5582 };
5583
5584 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5585         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5586         .exit_safe_mode = iceland_exit_rlc_safe_mode
5587 };
5588
5589 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5590         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5591         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5592 };
5593
5594 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5595                                                       bool enable)
5596 {
5597         uint32_t temp, data;
5598
5599         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5600
5601         /* It is disabled by HW by default */
5602         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5603                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5604                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5605                                 /* 1 - RLC memory Light sleep */
5606                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5607
5608                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5609                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5610                 }
5611
5612                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5613                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5614                 if (adev->flags & AMD_IS_APU)
5615                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5616                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5617                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5618                 else
5619                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5620                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5621                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5622                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5623
5624                 if (temp != data)
5625                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5626
5627                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5628                 gfx_v8_0_wait_for_rlc_serdes(adev);
5629
5630                 /* 5 - clear mgcg override */
5631                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5632
5633                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5634                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5635                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5636                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5637                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5638                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5639                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5640                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5641                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5642                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5643                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5644                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5645                         if (temp != data)
5646                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5647                 }
5648                 udelay(50);
5649
5650                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5651                 gfx_v8_0_wait_for_rlc_serdes(adev);
5652         } else {
5653                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5654                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5655                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5656                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5657                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5658                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5659                 if (temp != data)
5660                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5661
5662                 /* 2 - disable MGLS in RLC */
5663                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5664                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5665                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5666                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5667                 }
5668
5669                 /* 3 - disable MGLS in CP */
5670                 data = RREG32(mmCP_MEM_SLP_CNTL);
5671                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5672                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5673                         WREG32(mmCP_MEM_SLP_CNTL, data);
5674                 }
5675
5676                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5677                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5678                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5679                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5680                 if (temp != data)
5681                         WREG32(mmCGTS_SM_CTRL_REG, data);
5682
5683                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5684                 gfx_v8_0_wait_for_rlc_serdes(adev);
5685
5686                 /* 6 - set mgcg override */
5687                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5688
5689                 udelay(50);
5690
5691                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5692                 gfx_v8_0_wait_for_rlc_serdes(adev);
5693         }
5694
5695         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5696 }
5697
5698 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5699                                                       bool enable)
5700 {
5701         uint32_t temp, temp1, data, data1;
5702
5703         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5704
5705         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5706
5707         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5708                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5709                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5710                 if (temp1 != data1)
5711                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5712
5713                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5714                 gfx_v8_0_wait_for_rlc_serdes(adev);
5715
5716                 /* 2 - clear cgcg override */
5717                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5718
5719                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5720                 gfx_v8_0_wait_for_rlc_serdes(adev);
5721
5722                 /* 3 - write cmd to set CGLS */
5723                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5724
5725                 /* 4 - enable cgcg */
5726                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5727
5728                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5729                         /* enable cgls*/
5730                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5731
5732                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5733                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5734
5735                         if (temp1 != data1)
5736                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5737                 } else {
5738                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5739                 }
5740
5741                 if (temp != data)
5742                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5743
5744                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5745                  * Cmp_busy/GFX_Idle interrupts
5746                  */
5747                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5748         } else {
5749                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5750                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5751
5752                 /* TEST CGCG */
5753                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5754                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5755                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5756                 if (temp1 != data1)
5757                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5758
5759                 /* read gfx register to wake up cgcg */
5760                 RREG32(mmCB_CGTT_SCLK_CTRL);
5761                 RREG32(mmCB_CGTT_SCLK_CTRL);
5762                 RREG32(mmCB_CGTT_SCLK_CTRL);
5763                 RREG32(mmCB_CGTT_SCLK_CTRL);
5764
5765                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5766                 gfx_v8_0_wait_for_rlc_serdes(adev);
5767
5768                 /* write cmd to Set CGCG Overrride */
5769                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5770
5771                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5772                 gfx_v8_0_wait_for_rlc_serdes(adev);
5773
5774                 /* write cmd to Clear CGLS */
5775                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5776
5777                 /* disable cgcg, cgls should be disabled too. */
5778                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5779                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5780                 if (temp != data)
5781                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5782         }
5783
5784         gfx_v8_0_wait_for_rlc_serdes(adev);
5785
5786         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5787 }
5788 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5789                                             bool enable)
5790 {
5791         if (enable) {
5792                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5793                  * ===  MGCG + MGLS + TS(CG/LS) ===
5794                  */
5795                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5796                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5797         } else {
5798                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5799                  * ===  CGCG + CGLS ===
5800                  */
5801                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5802                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5803         }
5804         return 0;
5805 }
5806
5807 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5808                                           enum amd_clockgating_state state)
5809 {
5810         uint32_t msg_id, pp_state = 0;
5811         uint32_t pp_support_state = 0;
5812         void *pp_handle = adev->powerplay.pp_handle;
5813
5814         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5815                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5816                         pp_support_state = PP_STATE_SUPPORT_LS;
5817                         pp_state = PP_STATE_LS;
5818                 }
5819                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5820                         pp_support_state |= PP_STATE_SUPPORT_CG;
5821                         pp_state |= PP_STATE_CG;
5822                 }
5823                 if (state == AMD_CG_STATE_UNGATE)
5824                         pp_state = 0;
5825
5826                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5827                                 PP_BLOCK_GFX_CG,
5828                                 pp_support_state,
5829                                 pp_state);
5830                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5831         }
5832
5833         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5834                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5835                         pp_support_state = PP_STATE_SUPPORT_LS;
5836                         pp_state = PP_STATE_LS;
5837                 }
5838
5839                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5840                         pp_support_state |= PP_STATE_SUPPORT_CG;
5841                         pp_state |= PP_STATE_CG;
5842                 }
5843
5844                 if (state == AMD_CG_STATE_UNGATE)
5845                         pp_state = 0;
5846
5847                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5848                                 PP_BLOCK_GFX_MG,
5849                                 pp_support_state,
5850                                 pp_state);
5851                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5852         }
5853
5854         return 0;
5855 }
5856
5857 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5858                                           enum amd_clockgating_state state)
5859 {
5860
5861         uint32_t msg_id, pp_state = 0;
5862         uint32_t pp_support_state = 0;
5863         void *pp_handle = adev->powerplay.pp_handle;
5864
5865         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5866                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5867                         pp_support_state = PP_STATE_SUPPORT_LS;
5868                         pp_state = PP_STATE_LS;
5869                 }
5870                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5871                         pp_support_state |= PP_STATE_SUPPORT_CG;
5872                         pp_state |= PP_STATE_CG;
5873                 }
5874                 if (state == AMD_CG_STATE_UNGATE)
5875                         pp_state = 0;
5876
5877                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5878                                 PP_BLOCK_GFX_CG,
5879                                 pp_support_state,
5880                                 pp_state);
5881                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5882         }
5883
5884         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5885                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5886                         pp_support_state = PP_STATE_SUPPORT_LS;
5887                         pp_state = PP_STATE_LS;
5888                 }
5889                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5890                         pp_support_state |= PP_STATE_SUPPORT_CG;
5891                         pp_state |= PP_STATE_CG;
5892                 }
5893                 if (state == AMD_CG_STATE_UNGATE)
5894                         pp_state = 0;
5895
5896                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5897                                 PP_BLOCK_GFX_3D,
5898                                 pp_support_state,
5899                                 pp_state);
5900                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5901         }
5902
5903         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5904                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5905                         pp_support_state = PP_STATE_SUPPORT_LS;
5906                         pp_state = PP_STATE_LS;
5907                 }
5908
5909                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5910                         pp_support_state |= PP_STATE_SUPPORT_CG;
5911                         pp_state |= PP_STATE_CG;
5912                 }
5913
5914                 if (state == AMD_CG_STATE_UNGATE)
5915                         pp_state = 0;
5916
5917                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5918                                 PP_BLOCK_GFX_MG,
5919                                 pp_support_state,
5920                                 pp_state);
5921                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5922         }
5923
5924         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5925                 pp_support_state = PP_STATE_SUPPORT_LS;
5926
5927                 if (state == AMD_CG_STATE_UNGATE)
5928                         pp_state = 0;
5929                 else
5930                         pp_state = PP_STATE_LS;
5931
5932                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5933                                 PP_BLOCK_GFX_RLC,
5934                                 pp_support_state,
5935                                 pp_state);
5936                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5937         }
5938
5939         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5940                 pp_support_state = PP_STATE_SUPPORT_LS;
5941
5942                 if (state == AMD_CG_STATE_UNGATE)
5943                         pp_state = 0;
5944                 else
5945                         pp_state = PP_STATE_LS;
5946                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947                         PP_BLOCK_GFX_CP,
5948                         pp_support_state,
5949                         pp_state);
5950                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5951         }
5952
5953         return 0;
5954 }
5955
5956 static int gfx_v8_0_set_clockgating_state(void *handle,
5957                                           enum amd_clockgating_state state)
5958 {
5959         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5960
5961         switch (adev->asic_type) {
5962         case CHIP_FIJI:
5963         case CHIP_CARRIZO:
5964         case CHIP_STONEY:
5965                 gfx_v8_0_update_gfx_clock_gating(adev,
5966                                                  state == AMD_CG_STATE_GATE ? true : false);
5967                 break;
5968         case CHIP_TONGA:
5969                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5970                 break;
5971         case CHIP_POLARIS10:
5972         case CHIP_POLARIS11:
5973                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5974                 break;
5975         default:
5976                 break;
5977         }
5978         return 0;
5979 }
5980
5981 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5982 {
5983         return ring->adev->wb.wb[ring->rptr_offs];
5984 }
5985
5986 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5987 {
5988         struct amdgpu_device *adev = ring->adev;
5989
5990         if (ring->use_doorbell)
5991                 /* XXX check if swapping is necessary on BE */
5992                 return ring->adev->wb.wb[ring->wptr_offs];
5993         else
5994                 return RREG32(mmCP_RB0_WPTR);
5995 }
5996
5997 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5998 {
5999         struct amdgpu_device *adev = ring->adev;
6000
6001         if (ring->use_doorbell) {
6002                 /* XXX check if swapping is necessary on BE */
6003                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6004                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6005         } else {
6006                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6007                 (void)RREG32(mmCP_RB0_WPTR);
6008         }
6009 }
6010
6011 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6012 {
6013         u32 ref_and_mask, reg_mem_engine;
6014
6015         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
6016                 switch (ring->me) {
6017                 case 1:
6018                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6019                         break;
6020                 case 2:
6021                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6022                         break;
6023                 default:
6024                         return;
6025                 }
6026                 reg_mem_engine = 0;
6027         } else {
6028                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6029                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6030         }
6031
6032         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6033         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6034                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6035                                  reg_mem_engine));
6036         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6037         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6038         amdgpu_ring_write(ring, ref_and_mask);
6039         amdgpu_ring_write(ring, ref_and_mask);
6040         amdgpu_ring_write(ring, 0x20); /* poll interval */
6041 }
6042
6043 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6044 {
6045         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6046         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6047                 EVENT_INDEX(4));
6048
6049         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6050         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6051                 EVENT_INDEX(0));
6052 }
6053
6054
6055 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6056 {
6057         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6058         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6059                                  WRITE_DATA_DST_SEL(0) |
6060                                  WR_CONFIRM));
6061         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6062         amdgpu_ring_write(ring, 0);
6063         amdgpu_ring_write(ring, 1);
6064
6065 }
6066
6067 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6068                                       struct amdgpu_ib *ib,
6069                                       unsigned vm_id, bool ctx_switch)
6070 {
6071         u32 header, control = 0;
6072
6073         if (ib->flags & AMDGPU_IB_FLAG_CE)
6074                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6075         else
6076                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6077
6078         control |= ib->length_dw | (vm_id << 24);
6079
6080         amdgpu_ring_write(ring, header);
6081         amdgpu_ring_write(ring,
6082 #ifdef __BIG_ENDIAN
6083                           (2 << 0) |
6084 #endif
6085                           (ib->gpu_addr & 0xFFFFFFFC));
6086         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6087         amdgpu_ring_write(ring, control);
6088 }
6089
6090 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6091                                           struct amdgpu_ib *ib,
6092                                           unsigned vm_id, bool ctx_switch)
6093 {
6094         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6095
6096         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6097         amdgpu_ring_write(ring,
6098 #ifdef __BIG_ENDIAN
6099                                 (2 << 0) |
6100 #endif
6101                                 (ib->gpu_addr & 0xFFFFFFFC));
6102         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6103         amdgpu_ring_write(ring, control);
6104 }
6105
6106 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6107                                          u64 seq, unsigned flags)
6108 {
6109         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6110         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6111
6112         /* EVENT_WRITE_EOP - flush caches, send int */
6113         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6114         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6115                                  EOP_TC_ACTION_EN |
6116                                  EOP_TC_WB_ACTION_EN |
6117                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6118                                  EVENT_INDEX(5)));
6119         amdgpu_ring_write(ring, addr & 0xfffffffc);
6120         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6121                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6122         amdgpu_ring_write(ring, lower_32_bits(seq));
6123         amdgpu_ring_write(ring, upper_32_bits(seq));
6124
6125 }
6126
6127 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6128 {
6129         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6130         uint32_t seq = ring->fence_drv.sync_seq;
6131         uint64_t addr = ring->fence_drv.gpu_addr;
6132
6133         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6134         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6135                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6136                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6137         amdgpu_ring_write(ring, addr & 0xfffffffc);
6138         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6139         amdgpu_ring_write(ring, seq);
6140         amdgpu_ring_write(ring, 0xffffffff);
6141         amdgpu_ring_write(ring, 4); /* poll interval */
6142 }
6143
6144 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6145                                         unsigned vm_id, uint64_t pd_addr)
6146 {
6147         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6148
6149         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6150         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6151                                  WRITE_DATA_DST_SEL(0)) |
6152                                  WR_CONFIRM);
6153         if (vm_id < 8) {
6154                 amdgpu_ring_write(ring,
6155                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6156         } else {
6157                 amdgpu_ring_write(ring,
6158                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6159         }
6160         amdgpu_ring_write(ring, 0);
6161         amdgpu_ring_write(ring, pd_addr >> 12);
6162
6163         /* bits 0-15 are the VM contexts0-15 */
6164         /* invalidate the cache */
6165         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6166         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6167                                  WRITE_DATA_DST_SEL(0)));
6168         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6169         amdgpu_ring_write(ring, 0);
6170         amdgpu_ring_write(ring, 1 << vm_id);
6171
6172         /* wait for the invalidate to complete */
6173         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6174         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6175                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6176                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6177         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6178         amdgpu_ring_write(ring, 0);
6179         amdgpu_ring_write(ring, 0); /* ref */
6180         amdgpu_ring_write(ring, 0); /* mask */
6181         amdgpu_ring_write(ring, 0x20); /* poll interval */
6182
6183         /* compute doesn't have PFP */
6184         if (usepfp) {
6185                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6186                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6187                 amdgpu_ring_write(ring, 0x0);
6188                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6189                 amdgpu_ring_insert_nop(ring, 128);
6190         }
6191 }
6192
6193 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6194 {
6195         return ring->adev->wb.wb[ring->wptr_offs];
6196 }
6197
6198 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6199 {
6200         struct amdgpu_device *adev = ring->adev;
6201
6202         /* XXX check if swapping is necessary on BE */
6203         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6204         WDOORBELL32(ring->doorbell_index, ring->wptr);
6205 }
6206
6207 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6208                                              u64 addr, u64 seq,
6209                                              unsigned flags)
6210 {
6211         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6212         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6213
6214         /* RELEASE_MEM - flush caches, send int */
6215         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6216         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6217                                  EOP_TC_ACTION_EN |
6218                                  EOP_TC_WB_ACTION_EN |
6219                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6220                                  EVENT_INDEX(5)));
6221         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6222         amdgpu_ring_write(ring, addr & 0xfffffffc);
6223         amdgpu_ring_write(ring, upper_32_bits(addr));
6224         amdgpu_ring_write(ring, lower_32_bits(seq));
6225         amdgpu_ring_write(ring, upper_32_bits(seq));
6226 }
6227
6228 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6229 {
6230         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6231         amdgpu_ring_write(ring, 0);
6232 }
6233
6234 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6235 {
6236         uint32_t dw2 = 0;
6237
6238         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6239         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6240                 gfx_v8_0_ring_emit_vgt_flush(ring);
6241                 /* set load_global_config & load_global_uconfig */
6242                 dw2 |= 0x8001;
6243                 /* set load_cs_sh_regs */
6244                 dw2 |= 0x01000000;
6245                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6246                 dw2 |= 0x10002;
6247
6248                 /* set load_ce_ram if preamble presented */
6249                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6250                         dw2 |= 0x10000000;
6251         } else {
6252                 /* still load_ce_ram if this is the first time preamble presented
6253                  * although there is no context switch happens.
6254                  */
6255                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6256                         dw2 |= 0x10000000;
6257         }
6258
6259         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6260         amdgpu_ring_write(ring, dw2);
6261         amdgpu_ring_write(ring, 0);
6262 }
6263
6264 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6265                                                  enum amdgpu_interrupt_state state)
6266 {
6267         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6268                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6269 }
6270
6271 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6272                                                      int me, int pipe,
6273                                                      enum amdgpu_interrupt_state state)
6274 {
6275         /*
6276          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6277          * handles the setting of interrupts for this specific pipe. All other
6278          * pipes' interrupts are set by amdkfd.
6279          */
6280
6281         if (me == 1) {
6282                 switch (pipe) {
6283                 case 0:
6284                         break;
6285                 default:
6286                         DRM_DEBUG("invalid pipe %d\n", pipe);
6287                         return;
6288                 }
6289         } else {
6290                 DRM_DEBUG("invalid me %d\n", me);
6291                 return;
6292         }
6293
6294         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6295                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6296 }
6297
6298 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6299                                              struct amdgpu_irq_src *source,
6300                                              unsigned type,
6301                                              enum amdgpu_interrupt_state state)
6302 {
6303         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6304                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6305
6306         return 0;
6307 }
6308
6309 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6310                                               struct amdgpu_irq_src *source,
6311                                               unsigned type,
6312                                               enum amdgpu_interrupt_state state)
6313 {
6314         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6315                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6316
6317         return 0;
6318 }
6319
6320 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6321                                             struct amdgpu_irq_src *src,
6322                                             unsigned type,
6323                                             enum amdgpu_interrupt_state state)
6324 {
6325         switch (type) {
6326         case AMDGPU_CP_IRQ_GFX_EOP:
6327                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6328                 break;
6329         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6330                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6331                 break;
6332         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6333                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6334                 break;
6335         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6336                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6337                 break;
6338         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6339                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6340                 break;
6341         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6342                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6343                 break;
6344         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6345                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6346                 break;
6347         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6348                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6349                 break;
6350         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6351                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6352                 break;
6353         default:
6354                 break;
6355         }
6356         return 0;
6357 }
6358
6359 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6360                             struct amdgpu_irq_src *source,
6361                             struct amdgpu_iv_entry *entry)
6362 {
6363         int i;
6364         u8 me_id, pipe_id, queue_id;
6365         struct amdgpu_ring *ring;
6366
6367         DRM_DEBUG("IH: CP EOP\n");
6368         me_id = (entry->ring_id & 0x0c) >> 2;
6369         pipe_id = (entry->ring_id & 0x03) >> 0;
6370         queue_id = (entry->ring_id & 0x70) >> 4;
6371
6372         switch (me_id) {
6373         case 0:
6374                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6375                 break;
6376         case 1:
6377         case 2:
6378                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6379                         ring = &adev->gfx.compute_ring[i];
6380                         /* Per-queue interrupt is supported for MEC starting from VI.
6381                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6382                           */
6383                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6384                                 amdgpu_fence_process(ring);
6385                 }
6386                 break;
6387         }
6388         return 0;
6389 }
6390
6391 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6392                                  struct amdgpu_irq_src *source,
6393                                  struct amdgpu_iv_entry *entry)
6394 {
6395         DRM_ERROR("Illegal register access in command stream\n");
6396         schedule_work(&adev->reset_work);
6397         return 0;
6398 }
6399
6400 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6401                                   struct amdgpu_irq_src *source,
6402                                   struct amdgpu_iv_entry *entry)
6403 {
6404         DRM_ERROR("Illegal instruction in command stream\n");
6405         schedule_work(&adev->reset_work);
6406         return 0;
6407 }
6408
6409 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6410         .name = "gfx_v8_0",
6411         .early_init = gfx_v8_0_early_init,
6412         .late_init = gfx_v8_0_late_init,
6413         .sw_init = gfx_v8_0_sw_init,
6414         .sw_fini = gfx_v8_0_sw_fini,
6415         .hw_init = gfx_v8_0_hw_init,
6416         .hw_fini = gfx_v8_0_hw_fini,
6417         .suspend = gfx_v8_0_suspend,
6418         .resume = gfx_v8_0_resume,
6419         .is_idle = gfx_v8_0_is_idle,
6420         .wait_for_idle = gfx_v8_0_wait_for_idle,
6421         .check_soft_reset = gfx_v8_0_check_soft_reset,
6422         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6423         .soft_reset = gfx_v8_0_soft_reset,
6424         .post_soft_reset = gfx_v8_0_post_soft_reset,
6425         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6426         .set_powergating_state = gfx_v8_0_set_powergating_state,
6427 };
6428
6429 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6430         .type = AMDGPU_RING_TYPE_GFX,
6431         .align_mask = 0xff,
6432         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6433         .get_rptr = gfx_v8_0_ring_get_rptr,
6434         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6435         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6436         .emit_frame_size =
6437                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6438                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6439                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6440                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6441                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6442                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6443                 2 + /* gfx_v8_ring_emit_sb */
6444                 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6445         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6446         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6447         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6448         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6449         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6450         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6451         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6452         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6453         .test_ring = gfx_v8_0_ring_test_ring,
6454         .test_ib = gfx_v8_0_ring_test_ib,
6455         .insert_nop = amdgpu_ring_insert_nop,
6456         .pad_ib = amdgpu_ring_generic_pad_ib,
6457         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6458         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6459 };
6460
6461 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6462         .type = AMDGPU_RING_TYPE_COMPUTE,
6463         .align_mask = 0xff,
6464         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6465         .get_rptr = gfx_v8_0_ring_get_rptr,
6466         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6467         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6468         .emit_frame_size =
6469                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6470                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6471                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6472                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6473                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6474                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6475         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6476         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6477         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6478         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6479         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6480         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6481         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6482         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6483         .test_ring = gfx_v8_0_ring_test_ring,
6484         .test_ib = gfx_v8_0_ring_test_ib,
6485         .insert_nop = amdgpu_ring_insert_nop,
6486         .pad_ib = amdgpu_ring_generic_pad_ib,
6487 };
6488
6489 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6490 {
6491         int i;
6492
6493         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6494                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6495
6496         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6497                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6498 }
6499
6500 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6501         .set = gfx_v8_0_set_eop_interrupt_state,
6502         .process = gfx_v8_0_eop_irq,
6503 };
6504
6505 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6506         .set = gfx_v8_0_set_priv_reg_fault_state,
6507         .process = gfx_v8_0_priv_reg_irq,
6508 };
6509
6510 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6511         .set = gfx_v8_0_set_priv_inst_fault_state,
6512         .process = gfx_v8_0_priv_inst_irq,
6513 };
6514
6515 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6516 {
6517         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6518         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6519
6520         adev->gfx.priv_reg_irq.num_types = 1;
6521         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6522
6523         adev->gfx.priv_inst_irq.num_types = 1;
6524         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6525 }
6526
6527 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6528 {
6529         switch (adev->asic_type) {
6530         case CHIP_TOPAZ:
6531                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6532                 break;
6533         case CHIP_STONEY:
6534         case CHIP_CARRIZO:
6535                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6536                 break;
6537         default:
6538                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6539                 break;
6540         }
6541 }
6542
6543 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6544 {
6545         /* init asci gds info */
6546         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6547         adev->gds.gws.total_size = 64;
6548         adev->gds.oa.total_size = 16;
6549
6550         if (adev->gds.mem.total_size == 64 * 1024) {
6551                 adev->gds.mem.gfx_partition_size = 4096;
6552                 adev->gds.mem.cs_partition_size = 4096;
6553
6554                 adev->gds.gws.gfx_partition_size = 4;
6555                 adev->gds.gws.cs_partition_size = 4;
6556
6557                 adev->gds.oa.gfx_partition_size = 4;
6558                 adev->gds.oa.cs_partition_size = 1;
6559         } else {
6560                 adev->gds.mem.gfx_partition_size = 1024;
6561                 adev->gds.mem.cs_partition_size = 1024;
6562
6563                 adev->gds.gws.gfx_partition_size = 16;
6564                 adev->gds.gws.cs_partition_size = 16;
6565
6566                 adev->gds.oa.gfx_partition_size = 4;
6567                 adev->gds.oa.cs_partition_size = 4;
6568         }
6569 }
6570
6571 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6572                                                  u32 bitmap)
6573 {
6574         u32 data;
6575
6576         if (!bitmap)
6577                 return;
6578
6579         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6580         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6581
6582         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6583 }
6584
6585 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6586 {
6587         u32 data, mask;
6588
6589         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6590                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6591
6592         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6593
6594         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6595 }
6596
6597 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6598 {
6599         int i, j, k, counter, active_cu_number = 0;
6600         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6601         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6602         unsigned disable_masks[4 * 2];
6603
6604         memset(cu_info, 0, sizeof(*cu_info));
6605
6606         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6607
6608         mutex_lock(&adev->grbm_idx_mutex);
6609         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6610                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6611                         mask = 1;
6612                         ao_bitmap = 0;
6613                         counter = 0;
6614                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6615                         if (i < 4 && j < 2)
6616                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6617                                         adev, disable_masks[i * 2 + j]);
6618                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6619                         cu_info->bitmap[i][j] = bitmap;
6620
6621                         for (k = 0; k < 16; k ++) {
6622                                 if (bitmap & mask) {
6623                                         if (counter < 2)
6624                                                 ao_bitmap |= mask;
6625                                         counter ++;
6626                                 }
6627                                 mask <<= 1;
6628                         }
6629                         active_cu_number += counter;
6630                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6631                 }
6632         }
6633         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6634         mutex_unlock(&adev->grbm_idx_mutex);
6635
6636         cu_info->number = active_cu_number;
6637         cu_info->ao_cu_mask = ao_cu_mask;
6638 }
6639
6640 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6641 {
6642         .type = AMD_IP_BLOCK_TYPE_GFX,
6643         .major = 8,
6644         .minor = 0,
6645         .rev = 0,
6646         .funcs = &gfx_v8_0_ip_funcs,
6647 };
6648
6649 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6650 {
6651         .type = AMD_IP_BLOCK_TYPE_GFX,
6652         .major = 8,
6653         .minor = 1,
6654         .rev = 0,
6655         .funcs = &gfx_v8_0_ip_funcs,
6656 };