]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drm/amdgpu/gfx8: rename some functions
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
664
665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666 {
667         switch (adev->asic_type) {
668         case CHIP_TOPAZ:
669                 amdgpu_program_register_sequence(adev,
670                                                  iceland_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  golden_settings_iceland_a11,
674                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675                 amdgpu_program_register_sequence(adev,
676                                                  iceland_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
678                 break;
679         case CHIP_FIJI:
680                 amdgpu_program_register_sequence(adev,
681                                                  fiji_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_fiji_a10,
685                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686                 amdgpu_program_register_sequence(adev,
687                                                  fiji_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
689                 break;
690
691         case CHIP_TONGA:
692                 amdgpu_program_register_sequence(adev,
693                                                  tonga_mgcg_cgcg_init,
694                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695                 amdgpu_program_register_sequence(adev,
696                                                  golden_settings_tonga_a11,
697                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698                 amdgpu_program_register_sequence(adev,
699                                                  tonga_golden_common_all,
700                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
701                 break;
702         case CHIP_POLARIS11:
703         case CHIP_POLARIS12:
704                 amdgpu_program_register_sequence(adev,
705                                                  golden_settings_polaris11_a11,
706                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
707                 amdgpu_program_register_sequence(adev,
708                                                  polaris11_golden_common_all,
709                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
710                 break;
711         case CHIP_POLARIS10:
712                 amdgpu_program_register_sequence(adev,
713                                                  golden_settings_polaris10_a11,
714                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
715                 amdgpu_program_register_sequence(adev,
716                                                  polaris10_golden_common_all,
717                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
718                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
719                 if (adev->pdev->revision == 0xc7 &&
720                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
723                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725                 }
726                 break;
727         case CHIP_CARRIZO:
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_mgcg_cgcg_init,
730                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_settings_a11,
733                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734                 amdgpu_program_register_sequence(adev,
735                                                  cz_golden_common_all,
736                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
737                 break;
738         case CHIP_STONEY:
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_mgcg_cgcg_init,
741                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_settings_a11,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745                 amdgpu_program_register_sequence(adev,
746                                                  stoney_golden_common_all,
747                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
748                 break;
749         default:
750                 break;
751         }
752 }
753
754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755 {
756         adev->gfx.scratch.num_reg = 7;
757         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
758         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
759 }
760
761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762 {
763         struct amdgpu_device *adev = ring->adev;
764         uint32_t scratch;
765         uint32_t tmp = 0;
766         unsigned i;
767         int r;
768
769         r = amdgpu_gfx_scratch_get(adev, &scratch);
770         if (r) {
771                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772                 return r;
773         }
774         WREG32(scratch, 0xCAFEDEAD);
775         r = amdgpu_ring_alloc(ring, 3);
776         if (r) {
777                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778                           ring->idx, r);
779                 amdgpu_gfx_scratch_free(adev, scratch);
780                 return r;
781         }
782         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784         amdgpu_ring_write(ring, 0xDEADBEEF);
785         amdgpu_ring_commit(ring);
786
787         for (i = 0; i < adev->usec_timeout; i++) {
788                 tmp = RREG32(scratch);
789                 if (tmp == 0xDEADBEEF)
790                         break;
791                 DRM_UDELAY(1);
792         }
793         if (i < adev->usec_timeout) {
794                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795                          ring->idx, i);
796         } else {
797                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798                           ring->idx, scratch, tmp);
799                 r = -EINVAL;
800         }
801         amdgpu_gfx_scratch_free(adev, scratch);
802         return r;
803 }
804
805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
806 {
807         struct amdgpu_device *adev = ring->adev;
808         struct amdgpu_ib ib;
809         struct dma_fence *f = NULL;
810         uint32_t scratch;
811         uint32_t tmp = 0;
812         long r;
813
814         r = amdgpu_gfx_scratch_get(adev, &scratch);
815         if (r) {
816                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
817                 return r;
818         }
819         WREG32(scratch, 0xCAFEDEAD);
820         memset(&ib, 0, sizeof(ib));
821         r = amdgpu_ib_get(adev, NULL, 256, &ib);
822         if (r) {
823                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
824                 goto err1;
825         }
826         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828         ib.ptr[2] = 0xDEADBEEF;
829         ib.length_dw = 3;
830
831         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832         if (r)
833                 goto err2;
834
835         r = dma_fence_wait_timeout(f, false, timeout);
836         if (r == 0) {
837                 DRM_ERROR("amdgpu: IB test timed out.\n");
838                 r = -ETIMEDOUT;
839                 goto err2;
840         } else if (r < 0) {
841                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
842                 goto err2;
843         }
844         tmp = RREG32(scratch);
845         if (tmp == 0xDEADBEEF) {
846                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
847                 r = 0;
848         } else {
849                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850                           scratch, tmp);
851                 r = -EINVAL;
852         }
853 err2:
854         amdgpu_ib_free(adev, &ib, NULL);
855         dma_fence_put(f);
856 err1:
857         amdgpu_gfx_scratch_free(adev, scratch);
858         return r;
859 }
860
861
862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863         release_firmware(adev->gfx.pfp_fw);
864         adev->gfx.pfp_fw = NULL;
865         release_firmware(adev->gfx.me_fw);
866         adev->gfx.me_fw = NULL;
867         release_firmware(adev->gfx.ce_fw);
868         adev->gfx.ce_fw = NULL;
869         release_firmware(adev->gfx.rlc_fw);
870         adev->gfx.rlc_fw = NULL;
871         release_firmware(adev->gfx.mec_fw);
872         adev->gfx.mec_fw = NULL;
873         if ((adev->asic_type != CHIP_STONEY) &&
874             (adev->asic_type != CHIP_TOPAZ))
875                 release_firmware(adev->gfx.mec2_fw);
876         adev->gfx.mec2_fw = NULL;
877
878         kfree(adev->gfx.rlc.register_list_format);
879 }
880
881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882 {
883         const char *chip_name;
884         char fw_name[30];
885         int err;
886         struct amdgpu_firmware_info *info = NULL;
887         const struct common_firmware_header *header = NULL;
888         const struct gfx_firmware_header_v1_0 *cp_hdr;
889         const struct rlc_firmware_header_v2_0 *rlc_hdr;
890         unsigned int *tmp = NULL, i;
891
892         DRM_DEBUG("\n");
893
894         switch (adev->asic_type) {
895         case CHIP_TOPAZ:
896                 chip_name = "topaz";
897                 break;
898         case CHIP_TONGA:
899                 chip_name = "tonga";
900                 break;
901         case CHIP_CARRIZO:
902                 chip_name = "carrizo";
903                 break;
904         case CHIP_FIJI:
905                 chip_name = "fiji";
906                 break;
907         case CHIP_POLARIS11:
908                 chip_name = "polaris11";
909                 break;
910         case CHIP_POLARIS10:
911                 chip_name = "polaris10";
912                 break;
913         case CHIP_POLARIS12:
914                 chip_name = "polaris12";
915                 break;
916         case CHIP_STONEY:
917                 chip_name = "stoney";
918                 break;
919         default:
920                 BUG();
921         }
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
924         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
935         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.me_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943
944         /* chain ib ucode isn't formal released, just disable it by far
945          * TODO: when ucod ready we should use ucode version to judge if
946          * chain-ib support or not.
947          */
948         adev->virt.chained_ib_support = false;
949
950         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
951
952         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
953         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954         if (err)
955                 goto out;
956         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957         if (err)
958                 goto out;
959         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
962
963         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
964         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965         if (err)
966                 goto out;
967         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
968         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972         adev->gfx.rlc.save_and_restore_offset =
973                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
974         adev->gfx.rlc.clear_state_descriptor_offset =
975                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976         adev->gfx.rlc.avail_scratch_ram_locations =
977                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978         adev->gfx.rlc.reg_restore_list_size =
979                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
980         adev->gfx.rlc.reg_list_format_start =
981                         le32_to_cpu(rlc_hdr->reg_list_format_start);
982         adev->gfx.rlc.reg_list_format_separate_start =
983                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984         adev->gfx.rlc.starting_offsets_start =
985                         le32_to_cpu(rlc_hdr->starting_offsets_start);
986         adev->gfx.rlc.reg_list_format_size_bytes =
987                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988         adev->gfx.rlc.reg_list_size_bytes =
989                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991         adev->gfx.rlc.register_list_format =
992                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995         if (!adev->gfx.rlc.register_list_format) {
996                 err = -ENOMEM;
997                 goto out;
998         }
999
1000         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1001                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
1007         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1008                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1011
1012         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1013         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014         if (err)
1015                 goto out;
1016         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017         if (err)
1018                 goto out;
1019         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1022
1023         if ((adev->asic_type != CHIP_STONEY) &&
1024             (adev->asic_type != CHIP_TOPAZ)) {
1025                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027                 if (!err) {
1028                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029                         if (err)
1030                                 goto out;
1031                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032                                 adev->gfx.mec2_fw->data;
1033                         adev->gfx.mec2_fw_version =
1034                                 le32_to_cpu(cp_hdr->header.ucode_version);
1035                         adev->gfx.mec2_feature_version =
1036                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1037                 } else {
1038                         err = 0;
1039                         adev->gfx.mec2_fw = NULL;
1040                 }
1041         }
1042
1043         if (adev->firmware.smu_load) {
1044                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046                 info->fw = adev->gfx.pfp_fw;
1047                 header = (const struct common_firmware_header *)info->fw->data;
1048                 adev->firmware.fw_size +=
1049                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053                 info->fw = adev->gfx.me_fw;
1054                 header = (const struct common_firmware_header *)info->fw->data;
1055                 adev->firmware.fw_size +=
1056                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060                 info->fw = adev->gfx.ce_fw;
1061                 header = (const struct common_firmware_header *)info->fw->data;
1062                 adev->firmware.fw_size +=
1063                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067                 info->fw = adev->gfx.rlc_fw;
1068                 header = (const struct common_firmware_header *)info->fw->data;
1069                 adev->firmware.fw_size +=
1070                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074                 info->fw = adev->gfx.mec_fw;
1075                 header = (const struct common_firmware_header *)info->fw->data;
1076                 adev->firmware.fw_size +=
1077                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
1079                 /* we need account JT in */
1080                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081                 adev->firmware.fw_size +=
1082                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
1084                 if (amdgpu_sriov_vf(adev)) {
1085                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087                         info->fw = adev->gfx.mec_fw;
1088                         adev->firmware.fw_size +=
1089                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090                 }
1091
1092                 if (adev->gfx.mec2_fw) {
1093                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095                         info->fw = adev->gfx.mec2_fw;
1096                         header = (const struct common_firmware_header *)info->fw->data;
1097                         adev->firmware.fw_size +=
1098                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099                 }
1100
1101         }
1102
1103 out:
1104         if (err) {
1105                 dev_err(adev->dev,
1106                         "gfx8: Failed to load firmware \"%s\"\n",
1107                         fw_name);
1108                 release_firmware(adev->gfx.pfp_fw);
1109                 adev->gfx.pfp_fw = NULL;
1110                 release_firmware(adev->gfx.me_fw);
1111                 adev->gfx.me_fw = NULL;
1112                 release_firmware(adev->gfx.ce_fw);
1113                 adev->gfx.ce_fw = NULL;
1114                 release_firmware(adev->gfx.rlc_fw);
1115                 adev->gfx.rlc_fw = NULL;
1116                 release_firmware(adev->gfx.mec_fw);
1117                 adev->gfx.mec_fw = NULL;
1118                 release_firmware(adev->gfx.mec2_fw);
1119                 adev->gfx.mec2_fw = NULL;
1120         }
1121         return err;
1122 }
1123
1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125                                     volatile u32 *buffer)
1126 {
1127         u32 count = 0, i;
1128         const struct cs_section_def *sect = NULL;
1129         const struct cs_extent_def *ext = NULL;
1130
1131         if (adev->gfx.rlc.cs_data == NULL)
1132                 return;
1133         if (buffer == NULL)
1134                 return;
1135
1136         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140         buffer[count++] = cpu_to_le32(0x80000000);
1141         buffer[count++] = cpu_to_le32(0x80000000);
1142
1143         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145                         if (sect->id == SECT_CONTEXT) {
1146                                 buffer[count++] =
1147                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1149                                                 PACKET3_SET_CONTEXT_REG_START);
1150                                 for (i = 0; i < ext->reg_count; i++)
1151                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1152                         } else {
1153                                 return;
1154                         }
1155                 }
1156         }
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160                         PACKET3_SET_CONTEXT_REG_START);
1161         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1163
1164         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168         buffer[count++] = cpu_to_le32(0);
1169 }
1170
1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172 {
1173         const __le32 *fw_data;
1174         volatile u32 *dst_ptr;
1175         int me, i, max_me = 4;
1176         u32 bo_offset = 0;
1177         u32 table_offset, table_size;
1178
1179         if (adev->asic_type == CHIP_CARRIZO)
1180                 max_me = 5;
1181
1182         /* write the cp table buffer */
1183         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184         for (me = 0; me < max_me; me++) {
1185                 if (me == 0) {
1186                         const struct gfx_firmware_header_v1_0 *hdr =
1187                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188                         fw_data = (const __le32 *)
1189                                 (adev->gfx.ce_fw->data +
1190                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191                         table_offset = le32_to_cpu(hdr->jt_offset);
1192                         table_size = le32_to_cpu(hdr->jt_size);
1193                 } else if (me == 1) {
1194                         const struct gfx_firmware_header_v1_0 *hdr =
1195                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196                         fw_data = (const __le32 *)
1197                                 (adev->gfx.pfp_fw->data +
1198                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199                         table_offset = le32_to_cpu(hdr->jt_offset);
1200                         table_size = le32_to_cpu(hdr->jt_size);
1201                 } else if (me == 2) {
1202                         const struct gfx_firmware_header_v1_0 *hdr =
1203                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204                         fw_data = (const __le32 *)
1205                                 (adev->gfx.me_fw->data +
1206                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207                         table_offset = le32_to_cpu(hdr->jt_offset);
1208                         table_size = le32_to_cpu(hdr->jt_size);
1209                 } else if (me == 3) {
1210                         const struct gfx_firmware_header_v1_0 *hdr =
1211                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212                         fw_data = (const __le32 *)
1213                                 (adev->gfx.mec_fw->data +
1214                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215                         table_offset = le32_to_cpu(hdr->jt_offset);
1216                         table_size = le32_to_cpu(hdr->jt_size);
1217                 } else  if (me == 4) {
1218                         const struct gfx_firmware_header_v1_0 *hdr =
1219                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220                         fw_data = (const __le32 *)
1221                                 (adev->gfx.mec2_fw->data +
1222                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223                         table_offset = le32_to_cpu(hdr->jt_offset);
1224                         table_size = le32_to_cpu(hdr->jt_size);
1225                 }
1226
1227                 for (i = 0; i < table_size; i ++) {
1228                         dst_ptr[bo_offset + i] =
1229                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230                 }
1231
1232                 bo_offset += table_size;
1233         }
1234 }
1235
1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237 {
1238         int r;
1239
1240         /* clear state block */
1241         if (adev->gfx.rlc.clear_state_obj) {
1242                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243                 if (unlikely(r != 0))
1244                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1245                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1247                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248                 adev->gfx.rlc.clear_state_obj = NULL;
1249         }
1250
1251         /* jump table block */
1252         if (adev->gfx.rlc.cp_table_obj) {
1253                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254                 if (unlikely(r != 0))
1255                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1258                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259                 adev->gfx.rlc.cp_table_obj = NULL;
1260         }
1261 }
1262
1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264 {
1265         volatile u32 *dst_ptr;
1266         u32 dws;
1267         const struct cs_section_def *cs_data;
1268         int r;
1269
1270         adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272         cs_data = adev->gfx.rlc.cs_data;
1273
1274         if (cs_data) {
1275                 /* clear state block */
1276                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280                                              AMDGPU_GEM_DOMAIN_VRAM,
1281                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1283                                              NULL, NULL,
1284                                              &adev->gfx.rlc.clear_state_obj);
1285                         if (r) {
1286                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287                                 gfx_v8_0_rlc_fini(adev);
1288                                 return r;
1289                         }
1290                 }
1291                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292                 if (unlikely(r != 0)) {
1293                         gfx_v8_0_rlc_fini(adev);
1294                         return r;
1295                 }
1296                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297                                   &adev->gfx.rlc.clear_state_gpu_addr);
1298                 if (r) {
1299                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1301                         gfx_v8_0_rlc_fini(adev);
1302                         return r;
1303                 }
1304
1305                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306                 if (r) {
1307                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1308                         gfx_v8_0_rlc_fini(adev);
1309                         return r;
1310                 }
1311                 /* set up the cs buffer */
1312                 dst_ptr = adev->gfx.rlc.cs_ptr;
1313                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316         }
1317
1318         if ((adev->asic_type == CHIP_CARRIZO) ||
1319             (adev->asic_type == CHIP_STONEY)) {
1320                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1321                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323                                              AMDGPU_GEM_DOMAIN_VRAM,
1324                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1326                                              NULL, NULL,
1327                                              &adev->gfx.rlc.cp_table_obj);
1328                         if (r) {
1329                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330                                 return r;
1331                         }
1332                 }
1333
1334                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335                 if (unlikely(r != 0)) {
1336                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340                                   &adev->gfx.rlc.cp_table_gpu_addr);
1341                 if (r) {
1342                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1343                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1344                         return r;
1345                 }
1346                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         int r;
1364
1365         if (adev->gfx.mec.hpd_eop_obj) {
1366                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367                 if (unlikely(r != 0))
1368                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1371                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372                 adev->gfx.mec.hpd_eop_obj = NULL;
1373         }
1374 }
1375
1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377                                   struct amdgpu_ring *ring,
1378                                   struct amdgpu_irq_src *irq)
1379 {
1380         int r = 0;
1381
1382         r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1383         if (r)
1384                 return r;
1385
1386         ring->adev = NULL;
1387         ring->ring_obj = NULL;
1388         ring->use_doorbell = true;
1389         ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1390         if (adev->gfx.mec2_fw) {
1391                 ring->me = 2;
1392                 ring->pipe = 0;
1393         } else {
1394                 ring->me = 1;
1395                 ring->pipe = 1;
1396         }
1397
1398         ring->queue = 0;
1399         sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1400         r = amdgpu_ring_init(adev, ring, 1024,
1401                              irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1402         if (r)
1403                 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1404
1405         return r;
1406 }
1407 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1408                                    struct amdgpu_irq_src *irq)
1409 {
1410         amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1411         amdgpu_ring_fini(ring);
1412 }
1413
1414 #define MEC_HPD_SIZE 2048
1415
1416 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1417 {
1418         int r;
1419         u32 *hpd;
1420
1421         /*
1422          * we assign only 1 pipe because all other pipes will
1423          * be handled by KFD
1424          */
1425         adev->gfx.mec.num_mec = 1;
1426         adev->gfx.mec.num_pipe = 1;
1427         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1428
1429         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1430                 r = amdgpu_bo_create(adev,
1431                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1432                                      PAGE_SIZE, true,
1433                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1434                                      &adev->gfx.mec.hpd_eop_obj);
1435                 if (r) {
1436                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1437                         return r;
1438                 }
1439         }
1440
1441         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1442         if (unlikely(r != 0)) {
1443                 gfx_v8_0_mec_fini(adev);
1444                 return r;
1445         }
1446         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1447                           &adev->gfx.mec.hpd_eop_gpu_addr);
1448         if (r) {
1449                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1450                 gfx_v8_0_mec_fini(adev);
1451                 return r;
1452         }
1453         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1454         if (r) {
1455                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1456                 gfx_v8_0_mec_fini(adev);
1457                 return r;
1458         }
1459
1460         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1461
1462         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1463         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1464
1465         return 0;
1466 }
1467
1468 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1469 {
1470         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1471
1472         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1473 }
1474
1475 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1476 {
1477         int r;
1478         u32 *hpd;
1479         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1480
1481         r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1482                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1483                                     &kiq->eop_gpu_addr, (void **)&hpd);
1484         if (r) {
1485                 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1486                 return r;
1487         }
1488
1489         memset(hpd, 0, MEC_HPD_SIZE);
1490
1491         amdgpu_bo_kunmap(kiq->eop_obj);
1492
1493         return 0;
1494 }
1495
1496 static const u32 vgpr_init_compute_shader[] =
1497 {
1498         0x7e000209, 0x7e020208,
1499         0x7e040207, 0x7e060206,
1500         0x7e080205, 0x7e0a0204,
1501         0x7e0c0203, 0x7e0e0202,
1502         0x7e100201, 0x7e120200,
1503         0x7e140209, 0x7e160208,
1504         0x7e180207, 0x7e1a0206,
1505         0x7e1c0205, 0x7e1e0204,
1506         0x7e200203, 0x7e220202,
1507         0x7e240201, 0x7e260200,
1508         0x7e280209, 0x7e2a0208,
1509         0x7e2c0207, 0x7e2e0206,
1510         0x7e300205, 0x7e320204,
1511         0x7e340203, 0x7e360202,
1512         0x7e380201, 0x7e3a0200,
1513         0x7e3c0209, 0x7e3e0208,
1514         0x7e400207, 0x7e420206,
1515         0x7e440205, 0x7e460204,
1516         0x7e480203, 0x7e4a0202,
1517         0x7e4c0201, 0x7e4e0200,
1518         0x7e500209, 0x7e520208,
1519         0x7e540207, 0x7e560206,
1520         0x7e580205, 0x7e5a0204,
1521         0x7e5c0203, 0x7e5e0202,
1522         0x7e600201, 0x7e620200,
1523         0x7e640209, 0x7e660208,
1524         0x7e680207, 0x7e6a0206,
1525         0x7e6c0205, 0x7e6e0204,
1526         0x7e700203, 0x7e720202,
1527         0x7e740201, 0x7e760200,
1528         0x7e780209, 0x7e7a0208,
1529         0x7e7c0207, 0x7e7e0206,
1530         0xbf8a0000, 0xbf810000,
1531 };
1532
1533 static const u32 sgpr_init_compute_shader[] =
1534 {
1535         0xbe8a0100, 0xbe8c0102,
1536         0xbe8e0104, 0xbe900106,
1537         0xbe920108, 0xbe940100,
1538         0xbe960102, 0xbe980104,
1539         0xbe9a0106, 0xbe9c0108,
1540         0xbe9e0100, 0xbea00102,
1541         0xbea20104, 0xbea40106,
1542         0xbea60108, 0xbea80100,
1543         0xbeaa0102, 0xbeac0104,
1544         0xbeae0106, 0xbeb00108,
1545         0xbeb20100, 0xbeb40102,
1546         0xbeb60104, 0xbeb80106,
1547         0xbeba0108, 0xbebc0100,
1548         0xbebe0102, 0xbec00104,
1549         0xbec20106, 0xbec40108,
1550         0xbec60100, 0xbec80102,
1551         0xbee60004, 0xbee70005,
1552         0xbeea0006, 0xbeeb0007,
1553         0xbee80008, 0xbee90009,
1554         0xbefc0000, 0xbf8a0000,
1555         0xbf810000, 0x00000000,
1556 };
1557
1558 static const u32 vgpr_init_regs[] =
1559 {
1560         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1561         mmCOMPUTE_RESOURCE_LIMITS, 0,
1562         mmCOMPUTE_NUM_THREAD_X, 256*4,
1563         mmCOMPUTE_NUM_THREAD_Y, 1,
1564         mmCOMPUTE_NUM_THREAD_Z, 1,
1565         mmCOMPUTE_PGM_RSRC2, 20,
1566         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1567         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1568         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1569         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1570         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1571         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1572         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1573         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1574         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1575         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1576 };
1577
1578 static const u32 sgpr1_init_regs[] =
1579 {
1580         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1581         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1582         mmCOMPUTE_NUM_THREAD_X, 256*5,
1583         mmCOMPUTE_NUM_THREAD_Y, 1,
1584         mmCOMPUTE_NUM_THREAD_Z, 1,
1585         mmCOMPUTE_PGM_RSRC2, 20,
1586         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1587         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1588         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1589         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1590         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1591         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1592         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1593         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1594         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1595         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1596 };
1597
1598 static const u32 sgpr2_init_regs[] =
1599 {
1600         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1601         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1602         mmCOMPUTE_NUM_THREAD_X, 256*5,
1603         mmCOMPUTE_NUM_THREAD_Y, 1,
1604         mmCOMPUTE_NUM_THREAD_Z, 1,
1605         mmCOMPUTE_PGM_RSRC2, 20,
1606         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1607         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1608         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1609         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1610         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1611         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1612         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1613         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1614         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1615         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1616 };
1617
1618 static const u32 sec_ded_counter_registers[] =
1619 {
1620         mmCPC_EDC_ATC_CNT,
1621         mmCPC_EDC_SCRATCH_CNT,
1622         mmCPC_EDC_UCODE_CNT,
1623         mmCPF_EDC_ATC_CNT,
1624         mmCPF_EDC_ROQ_CNT,
1625         mmCPF_EDC_TAG_CNT,
1626         mmCPG_EDC_ATC_CNT,
1627         mmCPG_EDC_DMA_CNT,
1628         mmCPG_EDC_TAG_CNT,
1629         mmDC_EDC_CSINVOC_CNT,
1630         mmDC_EDC_RESTORE_CNT,
1631         mmDC_EDC_STATE_CNT,
1632         mmGDS_EDC_CNT,
1633         mmGDS_EDC_GRBM_CNT,
1634         mmGDS_EDC_OA_DED,
1635         mmSPI_EDC_CNT,
1636         mmSQC_ATC_EDC_GATCL1_CNT,
1637         mmSQC_EDC_CNT,
1638         mmSQ_EDC_DED_CNT,
1639         mmSQ_EDC_INFO,
1640         mmSQ_EDC_SEC_CNT,
1641         mmTCC_EDC_CNT,
1642         mmTCP_ATC_EDC_GATCL1_CNT,
1643         mmTCP_EDC_CNT,
1644         mmTD_EDC_CNT
1645 };
1646
1647 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1648 {
1649         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1650         struct amdgpu_ib ib;
1651         struct dma_fence *f = NULL;
1652         int r, i;
1653         u32 tmp;
1654         unsigned total_size, vgpr_offset, sgpr_offset;
1655         u64 gpu_addr;
1656
1657         /* only supported on CZ */
1658         if (adev->asic_type != CHIP_CARRIZO)
1659                 return 0;
1660
1661         /* bail if the compute ring is not ready */
1662         if (!ring->ready)
1663                 return 0;
1664
1665         tmp = RREG32(mmGB_EDC_MODE);
1666         WREG32(mmGB_EDC_MODE, 0);
1667
1668         total_size =
1669                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1670         total_size +=
1671                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1672         total_size +=
1673                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1674         total_size = ALIGN(total_size, 256);
1675         vgpr_offset = total_size;
1676         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1677         sgpr_offset = total_size;
1678         total_size += sizeof(sgpr_init_compute_shader);
1679
1680         /* allocate an indirect buffer to put the commands in */
1681         memset(&ib, 0, sizeof(ib));
1682         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1683         if (r) {
1684                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1685                 return r;
1686         }
1687
1688         /* load the compute shaders */
1689         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1690                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1691
1692         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1693                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1694
1695         /* init the ib length to 0 */
1696         ib.length_dw = 0;
1697
1698         /* VGPR */
1699         /* write the register state for the compute dispatch */
1700         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1701                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1702                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1703                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1704         }
1705         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1706         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1707         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1708         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1709         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1710         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1711
1712         /* write dispatch packet */
1713         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1714         ib.ptr[ib.length_dw++] = 8; /* x */
1715         ib.ptr[ib.length_dw++] = 1; /* y */
1716         ib.ptr[ib.length_dw++] = 1; /* z */
1717         ib.ptr[ib.length_dw++] =
1718                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1719
1720         /* write CS partial flush packet */
1721         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1722         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1723
1724         /* SGPR1 */
1725         /* write the register state for the compute dispatch */
1726         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1727                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1728                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1729                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1730         }
1731         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1732         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1733         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1734         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1735         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1736         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1737
1738         /* write dispatch packet */
1739         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1740         ib.ptr[ib.length_dw++] = 8; /* x */
1741         ib.ptr[ib.length_dw++] = 1; /* y */
1742         ib.ptr[ib.length_dw++] = 1; /* z */
1743         ib.ptr[ib.length_dw++] =
1744                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1745
1746         /* write CS partial flush packet */
1747         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1748         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1749
1750         /* SGPR2 */
1751         /* write the register state for the compute dispatch */
1752         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1753                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1754                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1755                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1756         }
1757         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1758         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1759         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1760         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1761         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1762         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1763
1764         /* write dispatch packet */
1765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1766         ib.ptr[ib.length_dw++] = 8; /* x */
1767         ib.ptr[ib.length_dw++] = 1; /* y */
1768         ib.ptr[ib.length_dw++] = 1; /* z */
1769         ib.ptr[ib.length_dw++] =
1770                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1771
1772         /* write CS partial flush packet */
1773         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1774         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1775
1776         /* shedule the ib on the ring */
1777         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1778         if (r) {
1779                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1780                 goto fail;
1781         }
1782
1783         /* wait for the GPU to finish processing the IB */
1784         r = dma_fence_wait(f, false);
1785         if (r) {
1786                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1787                 goto fail;
1788         }
1789
1790         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1791         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1792         WREG32(mmGB_EDC_MODE, tmp);
1793
1794         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1795         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1796         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1797
1798
1799         /* read back registers to clear the counters */
1800         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1801                 RREG32(sec_ded_counter_registers[i]);
1802
1803 fail:
1804         amdgpu_ib_free(adev, &ib, NULL);
1805         dma_fence_put(f);
1806
1807         return r;
1808 }
1809
1810 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1811 {
1812         u32 gb_addr_config;
1813         u32 mc_shared_chmap, mc_arb_ramcfg;
1814         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1815         u32 tmp;
1816         int ret;
1817
1818         switch (adev->asic_type) {
1819         case CHIP_TOPAZ:
1820                 adev->gfx.config.max_shader_engines = 1;
1821                 adev->gfx.config.max_tile_pipes = 2;
1822                 adev->gfx.config.max_cu_per_sh = 6;
1823                 adev->gfx.config.max_sh_per_se = 1;
1824                 adev->gfx.config.max_backends_per_se = 2;
1825                 adev->gfx.config.max_texture_channel_caches = 2;
1826                 adev->gfx.config.max_gprs = 256;
1827                 adev->gfx.config.max_gs_threads = 32;
1828                 adev->gfx.config.max_hw_contexts = 8;
1829
1830                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1835                 break;
1836         case CHIP_FIJI:
1837                 adev->gfx.config.max_shader_engines = 4;
1838                 adev->gfx.config.max_tile_pipes = 16;
1839                 adev->gfx.config.max_cu_per_sh = 16;
1840                 adev->gfx.config.max_sh_per_se = 1;
1841                 adev->gfx.config.max_backends_per_se = 4;
1842                 adev->gfx.config.max_texture_channel_caches = 16;
1843                 adev->gfx.config.max_gprs = 256;
1844                 adev->gfx.config.max_gs_threads = 32;
1845                 adev->gfx.config.max_hw_contexts = 8;
1846
1847                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852                 break;
1853         case CHIP_POLARIS11:
1854         case CHIP_POLARIS12:
1855                 ret = amdgpu_atombios_get_gfx_info(adev);
1856                 if (ret)
1857                         return ret;
1858                 adev->gfx.config.max_gprs = 256;
1859                 adev->gfx.config.max_gs_threads = 32;
1860                 adev->gfx.config.max_hw_contexts = 8;
1861
1862                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1867                 break;
1868         case CHIP_POLARIS10:
1869                 ret = amdgpu_atombios_get_gfx_info(adev);
1870                 if (ret)
1871                         return ret;
1872                 adev->gfx.config.max_gprs = 256;
1873                 adev->gfx.config.max_gs_threads = 32;
1874                 adev->gfx.config.max_hw_contexts = 8;
1875
1876                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1881                 break;
1882         case CHIP_TONGA:
1883                 adev->gfx.config.max_shader_engines = 4;
1884                 adev->gfx.config.max_tile_pipes = 8;
1885                 adev->gfx.config.max_cu_per_sh = 8;
1886                 adev->gfx.config.max_sh_per_se = 1;
1887                 adev->gfx.config.max_backends_per_se = 2;
1888                 adev->gfx.config.max_texture_channel_caches = 8;
1889                 adev->gfx.config.max_gprs = 256;
1890                 adev->gfx.config.max_gs_threads = 32;
1891                 adev->gfx.config.max_hw_contexts = 8;
1892
1893                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1894                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1895                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1896                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1897                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1898                 break;
1899         case CHIP_CARRIZO:
1900                 adev->gfx.config.max_shader_engines = 1;
1901                 adev->gfx.config.max_tile_pipes = 2;
1902                 adev->gfx.config.max_sh_per_se = 1;
1903                 adev->gfx.config.max_backends_per_se = 2;
1904
1905                 switch (adev->pdev->revision) {
1906                 case 0xc4:
1907                 case 0x84:
1908                 case 0xc8:
1909                 case 0xcc:
1910                 case 0xe1:
1911                 case 0xe3:
1912                         /* B10 */
1913                         adev->gfx.config.max_cu_per_sh = 8;
1914                         break;
1915                 case 0xc5:
1916                 case 0x81:
1917                 case 0x85:
1918                 case 0xc9:
1919                 case 0xcd:
1920                 case 0xe2:
1921                 case 0xe4:
1922                         /* B8 */
1923                         adev->gfx.config.max_cu_per_sh = 6;
1924                         break;
1925                 case 0xc6:
1926                 case 0xca:
1927                 case 0xce:
1928                 case 0x88:
1929                         /* B6 */
1930                         adev->gfx.config.max_cu_per_sh = 6;
1931                         break;
1932                 case 0xc7:
1933                 case 0x87:
1934                 case 0xcb:
1935                 case 0xe5:
1936                 case 0x89:
1937                 default:
1938                         /* B4 */
1939                         adev->gfx.config.max_cu_per_sh = 4;
1940                         break;
1941                 }
1942
1943                 adev->gfx.config.max_texture_channel_caches = 2;
1944                 adev->gfx.config.max_gprs = 256;
1945                 adev->gfx.config.max_gs_threads = 32;
1946                 adev->gfx.config.max_hw_contexts = 8;
1947
1948                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1949                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1950                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1951                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1952                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1953                 break;
1954         case CHIP_STONEY:
1955                 adev->gfx.config.max_shader_engines = 1;
1956                 adev->gfx.config.max_tile_pipes = 2;
1957                 adev->gfx.config.max_sh_per_se = 1;
1958                 adev->gfx.config.max_backends_per_se = 1;
1959
1960                 switch (adev->pdev->revision) {
1961                 case 0xc0:
1962                 case 0xc1:
1963                 case 0xc2:
1964                 case 0xc4:
1965                 case 0xc8:
1966                 case 0xc9:
1967                         adev->gfx.config.max_cu_per_sh = 3;
1968                         break;
1969                 case 0xd0:
1970                 case 0xd1:
1971                 case 0xd2:
1972                 default:
1973                         adev->gfx.config.max_cu_per_sh = 2;
1974                         break;
1975                 }
1976
1977                 adev->gfx.config.max_texture_channel_caches = 2;
1978                 adev->gfx.config.max_gprs = 256;
1979                 adev->gfx.config.max_gs_threads = 16;
1980                 adev->gfx.config.max_hw_contexts = 8;
1981
1982                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1983                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1984                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1985                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1986                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1987                 break;
1988         default:
1989                 adev->gfx.config.max_shader_engines = 2;
1990                 adev->gfx.config.max_tile_pipes = 4;
1991                 adev->gfx.config.max_cu_per_sh = 2;
1992                 adev->gfx.config.max_sh_per_se = 1;
1993                 adev->gfx.config.max_backends_per_se = 2;
1994                 adev->gfx.config.max_texture_channel_caches = 4;
1995                 adev->gfx.config.max_gprs = 256;
1996                 adev->gfx.config.max_gs_threads = 32;
1997                 adev->gfx.config.max_hw_contexts = 8;
1998
1999                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2000                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2001                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2002                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2003                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2004                 break;
2005         }
2006
2007         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2008         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2009         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2010
2011         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2012         adev->gfx.config.mem_max_burst_length_bytes = 256;
2013         if (adev->flags & AMD_IS_APU) {
2014                 /* Get memory bank mapping mode. */
2015                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2016                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2017                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2018
2019                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2020                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2021                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2022
2023                 /* Validate settings in case only one DIMM installed. */
2024                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2025                         dimm00_addr_map = 0;
2026                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2027                         dimm01_addr_map = 0;
2028                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2029                         dimm10_addr_map = 0;
2030                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2031                         dimm11_addr_map = 0;
2032
2033                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2034                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2035                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2036                         adev->gfx.config.mem_row_size_in_kb = 2;
2037                 else
2038                         adev->gfx.config.mem_row_size_in_kb = 1;
2039         } else {
2040                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2041                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2042                 if (adev->gfx.config.mem_row_size_in_kb > 4)
2043                         adev->gfx.config.mem_row_size_in_kb = 4;
2044         }
2045
2046         adev->gfx.config.shader_engine_tile_size = 32;
2047         adev->gfx.config.num_gpus = 1;
2048         adev->gfx.config.multi_gpu_tile_size = 64;
2049
2050         /* fix up row size */
2051         switch (adev->gfx.config.mem_row_size_in_kb) {
2052         case 1:
2053         default:
2054                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2055                 break;
2056         case 2:
2057                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2058                 break;
2059         case 4:
2060                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2061                 break;
2062         }
2063         adev->gfx.config.gb_addr_config = gb_addr_config;
2064
2065         return 0;
2066 }
2067
2068 static int gfx_v8_0_sw_init(void *handle)
2069 {
2070         int i, r;
2071         struct amdgpu_ring *ring;
2072         struct amdgpu_kiq *kiq;
2073         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2074
2075         /* KIQ event */
2076         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2077         if (r)
2078                 return r;
2079
2080         /* EOP Event */
2081         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2082         if (r)
2083                 return r;
2084
2085         /* Privileged reg */
2086         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2087                               &adev->gfx.priv_reg_irq);
2088         if (r)
2089                 return r;
2090
2091         /* Privileged inst */
2092         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2093                               &adev->gfx.priv_inst_irq);
2094         if (r)
2095                 return r;
2096
2097         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2098
2099         gfx_v8_0_scratch_init(adev);
2100
2101         r = gfx_v8_0_init_microcode(adev);
2102         if (r) {
2103                 DRM_ERROR("Failed to load gfx firmware!\n");
2104                 return r;
2105         }
2106
2107         r = gfx_v8_0_rlc_init(adev);
2108         if (r) {
2109                 DRM_ERROR("Failed to init rlc BOs!\n");
2110                 return r;
2111         }
2112
2113         r = gfx_v8_0_mec_init(adev);
2114         if (r) {
2115                 DRM_ERROR("Failed to init MEC BOs!\n");
2116                 return r;
2117         }
2118
2119         /* set up the gfx ring */
2120         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2121                 ring = &adev->gfx.gfx_ring[i];
2122                 ring->ring_obj = NULL;
2123                 sprintf(ring->name, "gfx");
2124                 /* no gfx doorbells on iceland */
2125                 if (adev->asic_type != CHIP_TOPAZ) {
2126                         ring->use_doorbell = true;
2127                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2128                 }
2129
2130                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2131                                      AMDGPU_CP_IRQ_GFX_EOP);
2132                 if (r)
2133                         return r;
2134         }
2135
2136         /* set up the compute queues */
2137         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2138                 unsigned irq_type;
2139
2140                 /* max 32 queues per MEC */
2141                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2142                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2143                         break;
2144                 }
2145                 ring = &adev->gfx.compute_ring[i];
2146                 ring->ring_obj = NULL;
2147                 ring->use_doorbell = true;
2148                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2149                 ring->me = 1; /* first MEC */
2150                 ring->pipe = i / 8;
2151                 ring->queue = i % 8;
2152                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2153                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2154                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2155                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2156                                      irq_type);
2157                 if (r)
2158                         return r;
2159         }
2160
2161         if (amdgpu_sriov_vf(adev)) {
2162                 r = gfx_v8_0_kiq_init(adev);
2163                 if (r) {
2164                         DRM_ERROR("Failed to init KIQ BOs!\n");
2165                         return r;
2166                 }
2167
2168                 kiq = &adev->gfx.kiq;
2169                 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2170                 if (r)
2171                         return r;
2172
2173                 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2174                 r = gfx_v8_0_compute_mqd_sw_init(adev);
2175                 if (r)
2176                         return r;
2177         }
2178
2179         /* reserve GDS, GWS and OA resource for gfx */
2180         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2181                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2182                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2183         if (r)
2184                 return r;
2185
2186         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2187                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2188                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2189         if (r)
2190                 return r;
2191
2192         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2193                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2194                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2195         if (r)
2196                 return r;
2197
2198         adev->gfx.ce_ram_size = 0x8000;
2199
2200         r = gfx_v8_0_gpu_early_init(adev);
2201         if (r)
2202                 return r;
2203
2204         return 0;
2205 }
2206
2207 static int gfx_v8_0_sw_fini(void *handle)
2208 {
2209         int i;
2210         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2211
2212         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2213         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2214         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2215
2216         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2217                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2218         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2219                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2220
2221         if (amdgpu_sriov_vf(adev)) {
2222                 gfx_v8_0_compute_mqd_sw_fini(adev);
2223                 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2224                 gfx_v8_0_kiq_fini(adev);
2225         }
2226
2227         gfx_v8_0_mec_fini(adev);
2228         gfx_v8_0_rlc_fini(adev);
2229         gfx_v8_0_free_microcode(adev);
2230
2231         return 0;
2232 }
2233
2234 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2235 {
2236         uint32_t *modearray, *mod2array;
2237         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2238         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2239         u32 reg_offset;
2240
2241         modearray = adev->gfx.config.tile_mode_array;
2242         mod2array = adev->gfx.config.macrotile_mode_array;
2243
2244         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2245                 modearray[reg_offset] = 0;
2246
2247         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2248                 mod2array[reg_offset] = 0;
2249
2250         switch (adev->asic_type) {
2251         case CHIP_TOPAZ:
2252                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253                                 PIPE_CONFIG(ADDR_SURF_P2) |
2254                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257                                 PIPE_CONFIG(ADDR_SURF_P2) |
2258                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261                                 PIPE_CONFIG(ADDR_SURF_P2) |
2262                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2263                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265                                 PIPE_CONFIG(ADDR_SURF_P2) |
2266                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2267                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269                                 PIPE_CONFIG(ADDR_SURF_P2) |
2270                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2271                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273                                 PIPE_CONFIG(ADDR_SURF_P2) |
2274                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2275                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2277                                 PIPE_CONFIG(ADDR_SURF_P2) |
2278                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2279                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2281                                 PIPE_CONFIG(ADDR_SURF_P2));
2282                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2283                                 PIPE_CONFIG(ADDR_SURF_P2) |
2284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2285                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2286                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287                                  PIPE_CONFIG(ADDR_SURF_P2) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2291                                  PIPE_CONFIG(ADDR_SURF_P2) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2294                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2295                                  PIPE_CONFIG(ADDR_SURF_P2) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                  PIPE_CONFIG(ADDR_SURF_P2) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2302                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2303                                  PIPE_CONFIG(ADDR_SURF_P2) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307                                  PIPE_CONFIG(ADDR_SURF_P2) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2310                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2311                                  PIPE_CONFIG(ADDR_SURF_P2) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2315                                  PIPE_CONFIG(ADDR_SURF_P2) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2319                                  PIPE_CONFIG(ADDR_SURF_P2) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2323                                  PIPE_CONFIG(ADDR_SURF_P2) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2326                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2327                                  PIPE_CONFIG(ADDR_SURF_P2) |
2328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2331                                  PIPE_CONFIG(ADDR_SURF_P2) |
2332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2335                                  PIPE_CONFIG(ADDR_SURF_P2) |
2336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2339                                  PIPE_CONFIG(ADDR_SURF_P2) |
2340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2343                                  PIPE_CONFIG(ADDR_SURF_P2) |
2344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2346                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347                                  PIPE_CONFIG(ADDR_SURF_P2) |
2348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2351                                  PIPE_CONFIG(ADDR_SURF_P2) |
2352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2354
2355                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362                                 NUM_BANKS(ADDR_SURF_8_BANK));
2363                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366                                 NUM_BANKS(ADDR_SURF_8_BANK));
2367                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370                                 NUM_BANKS(ADDR_SURF_8_BANK));
2371                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374                                 NUM_BANKS(ADDR_SURF_8_BANK));
2375                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378                                 NUM_BANKS(ADDR_SURF_8_BANK));
2379                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382                                 NUM_BANKS(ADDR_SURF_8_BANK));
2383                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2384                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2385                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386                                 NUM_BANKS(ADDR_SURF_16_BANK));
2387                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2388                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390                                 NUM_BANKS(ADDR_SURF_16_BANK));
2391                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2392                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2396                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2397                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2398                                  NUM_BANKS(ADDR_SURF_16_BANK));
2399                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2402                                  NUM_BANKS(ADDR_SURF_16_BANK));
2403                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2406                                  NUM_BANKS(ADDR_SURF_16_BANK));
2407                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2410                                  NUM_BANKS(ADDR_SURF_8_BANK));
2411
2412                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2413                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2414                             reg_offset != 23)
2415                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2416
2417                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2418                         if (reg_offset != 7)
2419                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2420
2421                 break;
2422         case CHIP_FIJI:
2423                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2430                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2434                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2435                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2438                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2439                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2442                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2443                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2444                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2446                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2447                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2448                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2450                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2454                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2456                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2457                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2460                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2465                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2466                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2469                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2473                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2477                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2478                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2482                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2484                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2485                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2486                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2488                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2489                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2490                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2492                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2493                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2494                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2495                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2497                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2498                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2500                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2502                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2506                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2508                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2509                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2510                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2512                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2513                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2514                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2515                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2516                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2517                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2518                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2521                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2522                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2525                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2526                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2529                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545
2546                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2548                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2549                                 NUM_BANKS(ADDR_SURF_8_BANK));
2550                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2553                                 NUM_BANKS(ADDR_SURF_8_BANK));
2554                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557                                 NUM_BANKS(ADDR_SURF_8_BANK));
2558                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2561                                 NUM_BANKS(ADDR_SURF_8_BANK));
2562                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2565                                 NUM_BANKS(ADDR_SURF_8_BANK));
2566                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2569                                 NUM_BANKS(ADDR_SURF_8_BANK));
2570                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2573                                 NUM_BANKS(ADDR_SURF_8_BANK));
2574                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2576                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2577                                 NUM_BANKS(ADDR_SURF_8_BANK));
2578                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2581                                 NUM_BANKS(ADDR_SURF_8_BANK));
2582                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2585                                  NUM_BANKS(ADDR_SURF_8_BANK));
2586                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2589                                  NUM_BANKS(ADDR_SURF_8_BANK));
2590                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2592                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2593                                  NUM_BANKS(ADDR_SURF_8_BANK));
2594                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2596                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2597                                  NUM_BANKS(ADDR_SURF_8_BANK));
2598                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2601                                  NUM_BANKS(ADDR_SURF_4_BANK));
2602
2603                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2604                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2605
2606                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2607                         if (reg_offset != 7)
2608                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2609
2610                 break;
2611         case CHIP_TONGA:
2612                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2616                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2619                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2628                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2631                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2632                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2634                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2637                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2649                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2653                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2655                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2658                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2662                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2663                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2664                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2669                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2671                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2673                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2677                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2681                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2683                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2684                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2686                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2687                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2689                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2691                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2695                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2699                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2701                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2702                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2703                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2705                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2707                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2708                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2711                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2712                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2715                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2716                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2720                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2724                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2728                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734
2735                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2738                                 NUM_BANKS(ADDR_SURF_16_BANK));
2739                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2741                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742                                 NUM_BANKS(ADDR_SURF_16_BANK));
2743                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                 NUM_BANKS(ADDR_SURF_16_BANK));
2747                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750                                 NUM_BANKS(ADDR_SURF_16_BANK));
2751                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754                                 NUM_BANKS(ADDR_SURF_16_BANK));
2755                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2758                                 NUM_BANKS(ADDR_SURF_16_BANK));
2759                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762                                 NUM_BANKS(ADDR_SURF_16_BANK));
2763                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2765                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2766                                 NUM_BANKS(ADDR_SURF_16_BANK));
2767                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770                                 NUM_BANKS(ADDR_SURF_16_BANK));
2771                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2774                                  NUM_BANKS(ADDR_SURF_16_BANK));
2775                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778                                  NUM_BANKS(ADDR_SURF_16_BANK));
2779                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782                                  NUM_BANKS(ADDR_SURF_8_BANK));
2783                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2786                                  NUM_BANKS(ADDR_SURF_4_BANK));
2787                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2789                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2790                                  NUM_BANKS(ADDR_SURF_4_BANK));
2791
2792                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2793                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2794
2795                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2796                         if (reg_offset != 7)
2797                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2798
2799                 break;
2800         case CHIP_POLARIS11:
2801         case CHIP_POLARIS12:
2802                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2806                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2814                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2818                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2822                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2826                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2830                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2835                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2836                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2847                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2848                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2852                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2859                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2860                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2861                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2863                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2865                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2868                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2869                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2871                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2872                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2873                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2876                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2877                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2879                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2881                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2885                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2887                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2888                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2889                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2891                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2892                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2893                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2896                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2897                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2900                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2901                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2904                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2905                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2907                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2911                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2915                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2919                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2920                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2924
2925                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928                                 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933                                 NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2937                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2938                                 NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2942                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943                                 NUM_BANKS(ADDR_SURF_16_BANK));
2944
2945                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2948                                 NUM_BANKS(ADDR_SURF_16_BANK));
2949
2950                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2952                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2953                                 NUM_BANKS(ADDR_SURF_16_BANK));
2954
2955                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2957                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2958                                 NUM_BANKS(ADDR_SURF_16_BANK));
2959
2960                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964
2965                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2966                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2967                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2968                                 NUM_BANKS(ADDR_SURF_16_BANK));
2969
2970                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2972                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2973                                 NUM_BANKS(ADDR_SURF_16_BANK));
2974
2975                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979
2980                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983                                 NUM_BANKS(ADDR_SURF_16_BANK));
2984
2985                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2988                                 NUM_BANKS(ADDR_SURF_8_BANK));
2989
2990                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2992                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2993                                 NUM_BANKS(ADDR_SURF_4_BANK));
2994
2995                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2996                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2997
2998                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2999                         if (reg_offset != 7)
3000                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3001
3002                 break;
3003         case CHIP_POLARIS10:
3004                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3008                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3013                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3016                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3020                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3021                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3023                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3024                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3025                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3026                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3027                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3028                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3029                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3032                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3033                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3034                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3036                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3037                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3038                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3039                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3041                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3042                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3043                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3045                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3047                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3048                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3049                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3050                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3051                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3052                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3053                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3054                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3055                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3056                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3057                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3058                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3059                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3061                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3062                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3063                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3065                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3066                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3067                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3069                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3070                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3071                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3073                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3074                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3075                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3078                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3079                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3081                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3083                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3087                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3091                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3093                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3094                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3095                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3097                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3099                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3101                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3103                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3107                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3113                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3117                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3121                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3122                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3125                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3126
3127                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3129                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3130                                 NUM_BANKS(ADDR_SURF_16_BANK));
3131
3132                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3134                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135                                 NUM_BANKS(ADDR_SURF_16_BANK));
3136
3137                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3139                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3140                                 NUM_BANKS(ADDR_SURF_16_BANK));
3141
3142                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3144                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145                                 NUM_BANKS(ADDR_SURF_16_BANK));
3146
3147                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                 NUM_BANKS(ADDR_SURF_16_BANK));
3151
3152                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3154                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3155                                 NUM_BANKS(ADDR_SURF_16_BANK));
3156
3157                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3158                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3159                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3160                                 NUM_BANKS(ADDR_SURF_16_BANK));
3161
3162                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3164                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3165                                 NUM_BANKS(ADDR_SURF_16_BANK));
3166
3167                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170                                 NUM_BANKS(ADDR_SURF_16_BANK));
3171
3172                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3174                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3175                                 NUM_BANKS(ADDR_SURF_16_BANK));
3176
3177                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3178                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3179                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3180                                 NUM_BANKS(ADDR_SURF_16_BANK));
3181
3182                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3183                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3184                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3185                                 NUM_BANKS(ADDR_SURF_8_BANK));
3186
3187                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3190                                 NUM_BANKS(ADDR_SURF_4_BANK));
3191
3192                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3195                                 NUM_BANKS(ADDR_SURF_4_BANK));
3196
3197                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3198                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3199
3200                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3201                         if (reg_offset != 7)
3202                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3203
3204                 break;
3205         case CHIP_STONEY:
3206                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207                                 PIPE_CONFIG(ADDR_SURF_P2) |
3208                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3210                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211                                 PIPE_CONFIG(ADDR_SURF_P2) |
3212                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3213                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3214                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215                                 PIPE_CONFIG(ADDR_SURF_P2) |
3216                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3217                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3218                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219                                 PIPE_CONFIG(ADDR_SURF_P2) |
3220                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3221                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3222                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223                                 PIPE_CONFIG(ADDR_SURF_P2) |
3224                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3225                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3226                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227                                 PIPE_CONFIG(ADDR_SURF_P2) |
3228                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3230                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3231                                 PIPE_CONFIG(ADDR_SURF_P2) |
3232                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3233                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3234                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3235                                 PIPE_CONFIG(ADDR_SURF_P2));
3236                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3237                                 PIPE_CONFIG(ADDR_SURF_P2) |
3238                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3240                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241                                  PIPE_CONFIG(ADDR_SURF_P2) |
3242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3244                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3245                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3248                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3249                                  PIPE_CONFIG(ADDR_SURF_P2) |
3250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3252                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253                                  PIPE_CONFIG(ADDR_SURF_P2) |
3254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3257                                  PIPE_CONFIG(ADDR_SURF_P2) |
3258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261                                  PIPE_CONFIG(ADDR_SURF_P2) |
3262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3264                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3265                                  PIPE_CONFIG(ADDR_SURF_P2) |
3266                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3268                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3269                                  PIPE_CONFIG(ADDR_SURF_P2) |
3270                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3271                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3273                                  PIPE_CONFIG(ADDR_SURF_P2) |
3274                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3275                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3277                                  PIPE_CONFIG(ADDR_SURF_P2) |
3278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3280                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3281                                  PIPE_CONFIG(ADDR_SURF_P2) |
3282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3284                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3285                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3288                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3289                                  PIPE_CONFIG(ADDR_SURF_P2) |
3290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3292                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3293                                  PIPE_CONFIG(ADDR_SURF_P2) |
3294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3296                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297                                  PIPE_CONFIG(ADDR_SURF_P2) |
3298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301                                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308
3309                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3311                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312                                 NUM_BANKS(ADDR_SURF_8_BANK));
3313                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3315                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316                                 NUM_BANKS(ADDR_SURF_8_BANK));
3317                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3320                                 NUM_BANKS(ADDR_SURF_8_BANK));
3321                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3322                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3323                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3324                                 NUM_BANKS(ADDR_SURF_8_BANK));
3325                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3326                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3327                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3328                                 NUM_BANKS(ADDR_SURF_8_BANK));
3329                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3331                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3332                                 NUM_BANKS(ADDR_SURF_8_BANK));
3333                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3336                                 NUM_BANKS(ADDR_SURF_8_BANK));
3337                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3338                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3339                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3340                                 NUM_BANKS(ADDR_SURF_16_BANK));
3341                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3342                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3343                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3344                                 NUM_BANKS(ADDR_SURF_16_BANK));
3345                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3346                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348                                  NUM_BANKS(ADDR_SURF_16_BANK));
3349                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3350                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3351                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352                                  NUM_BANKS(ADDR_SURF_16_BANK));
3353                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3355                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356                                  NUM_BANKS(ADDR_SURF_16_BANK));
3357                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360                                  NUM_BANKS(ADDR_SURF_16_BANK));
3361                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                                  NUM_BANKS(ADDR_SURF_8_BANK));
3365
3366                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3367                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3368                             reg_offset != 23)
3369                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3370
3371                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3372                         if (reg_offset != 7)
3373                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3374
3375                 break;
3376         default:
3377                 dev_warn(adev->dev,
3378                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3379                          adev->asic_type);
3380
3381         case CHIP_CARRIZO:
3382                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3383                                 PIPE_CONFIG(ADDR_SURF_P2) |
3384                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3385                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3386                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3387                                 PIPE_CONFIG(ADDR_SURF_P2) |
3388                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3389                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3390                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3391                                 PIPE_CONFIG(ADDR_SURF_P2) |
3392                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3393                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3394                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3395                                 PIPE_CONFIG(ADDR_SURF_P2) |
3396                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3397                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3398                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3399                                 PIPE_CONFIG(ADDR_SURF_P2) |
3400                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3401                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3402                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3403                                 PIPE_CONFIG(ADDR_SURF_P2) |
3404                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3406                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3407                                 PIPE_CONFIG(ADDR_SURF_P2) |
3408                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3409                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3410                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3411                                 PIPE_CONFIG(ADDR_SURF_P2));
3412                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3413                                 PIPE_CONFIG(ADDR_SURF_P2) |
3414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3415                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3416                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3417                                  PIPE_CONFIG(ADDR_SURF_P2) |
3418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3420                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3421                                  PIPE_CONFIG(ADDR_SURF_P2) |
3422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3424                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3425                                  PIPE_CONFIG(ADDR_SURF_P2) |
3426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3428                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3429                                  PIPE_CONFIG(ADDR_SURF_P2) |
3430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3432                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3433                                  PIPE_CONFIG(ADDR_SURF_P2) |
3434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3436                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3437                                  PIPE_CONFIG(ADDR_SURF_P2) |
3438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3440                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3441                                  PIPE_CONFIG(ADDR_SURF_P2) |
3442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3444                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3445                                  PIPE_CONFIG(ADDR_SURF_P2) |
3446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3449                                  PIPE_CONFIG(ADDR_SURF_P2) |
3450                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3451                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3452                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3453                                  PIPE_CONFIG(ADDR_SURF_P2) |
3454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3456                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3457                                  PIPE_CONFIG(ADDR_SURF_P2) |
3458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3460                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3461                                  PIPE_CONFIG(ADDR_SURF_P2) |
3462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3464                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3465                                  PIPE_CONFIG(ADDR_SURF_P2) |
3466                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3467                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3468                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3469                                  PIPE_CONFIG(ADDR_SURF_P2) |
3470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3472                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3473                                  PIPE_CONFIG(ADDR_SURF_P2) |
3474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3476                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3477                                  PIPE_CONFIG(ADDR_SURF_P2) |
3478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3480                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3481                                  PIPE_CONFIG(ADDR_SURF_P2) |
3482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3484
3485                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3486                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3487                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3488                                 NUM_BANKS(ADDR_SURF_8_BANK));
3489                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3490                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3491                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3492                                 NUM_BANKS(ADDR_SURF_8_BANK));
3493                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3494                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3495                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3496                                 NUM_BANKS(ADDR_SURF_8_BANK));
3497                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3498                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3499                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3500                                 NUM_BANKS(ADDR_SURF_8_BANK));
3501                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3502                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3503                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3504                                 NUM_BANKS(ADDR_SURF_8_BANK));
3505                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3506                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3507                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3508                                 NUM_BANKS(ADDR_SURF_8_BANK));
3509                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3511                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3512                                 NUM_BANKS(ADDR_SURF_8_BANK));
3513                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3514                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3515                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3516                                 NUM_BANKS(ADDR_SURF_16_BANK));
3517                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3518                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3519                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3520                                 NUM_BANKS(ADDR_SURF_16_BANK));
3521                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3522                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3523                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3524                                  NUM_BANKS(ADDR_SURF_16_BANK));
3525                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3526                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3527                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3528                                  NUM_BANKS(ADDR_SURF_16_BANK));
3529                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3530                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3531                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3532                                  NUM_BANKS(ADDR_SURF_16_BANK));
3533                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3534                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3535                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3536                                  NUM_BANKS(ADDR_SURF_16_BANK));
3537                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3538                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3539                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3540                                  NUM_BANKS(ADDR_SURF_8_BANK));
3541
3542                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3543                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3544                             reg_offset != 23)
3545                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3546
3547                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3548                         if (reg_offset != 7)
3549                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3550
3551                 break;
3552         }
3553 }
3554
3555 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3556                                   u32 se_num, u32 sh_num, u32 instance)
3557 {
3558         u32 data;
3559
3560         if (instance == 0xffffffff)
3561                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3562         else
3563                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3564
3565         if (se_num == 0xffffffff)
3566                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3567         else
3568                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3569
3570         if (sh_num == 0xffffffff)
3571                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3572         else
3573                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3574
3575         WREG32(mmGRBM_GFX_INDEX, data);
3576 }
3577
3578 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3579 {
3580         return (u32)((1ULL << bit_width) - 1);
3581 }
3582
3583 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3584 {
3585         u32 data, mask;
3586
3587         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3588                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3589
3590         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3591
3592         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3593                                        adev->gfx.config.max_sh_per_se);
3594
3595         return (~data) & mask;
3596 }
3597
3598 static void
3599 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3600 {
3601         switch (adev->asic_type) {
3602         case CHIP_FIJI:
3603                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3604                           RB_XSEL2(1) | PKR_MAP(2) |
3605                           PKR_XSEL(1) | PKR_YSEL(1) |
3606                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3607                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3608                            SE_PAIR_YSEL(2);
3609                 break;
3610         case CHIP_TONGA:
3611         case CHIP_POLARIS10:
3612                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3613                           SE_XSEL(1) | SE_YSEL(1);
3614                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3615                            SE_PAIR_YSEL(2);
3616                 break;
3617         case CHIP_TOPAZ:
3618         case CHIP_CARRIZO:
3619                 *rconf |= RB_MAP_PKR0(2);
3620                 *rconf1 |= 0x0;
3621                 break;
3622         case CHIP_POLARIS11:
3623         case CHIP_POLARIS12:
3624                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3625                           SE_XSEL(1) | SE_YSEL(1);
3626                 *rconf1 |= 0x0;
3627                 break;
3628         case CHIP_STONEY:
3629                 *rconf |= 0x0;
3630                 *rconf1 |= 0x0;
3631                 break;
3632         default:
3633                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3634                 break;
3635         }
3636 }
3637
3638 static void
3639 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3640                                         u32 raster_config, u32 raster_config_1,
3641                                         unsigned rb_mask, unsigned num_rb)
3642 {
3643         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3644         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3645         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3646         unsigned rb_per_se = num_rb / num_se;
3647         unsigned se_mask[4];
3648         unsigned se;
3649
3650         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3651         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3652         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3653         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3654
3655         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3656         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3657         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3658
3659         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3660                              (!se_mask[2] && !se_mask[3]))) {
3661                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3662
3663                 if (!se_mask[0] && !se_mask[1]) {
3664                         raster_config_1 |=
3665                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3666                 } else {
3667                         raster_config_1 |=
3668                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3669                 }
3670         }
3671
3672         for (se = 0; se < num_se; se++) {
3673                 unsigned raster_config_se = raster_config;
3674                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3675                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3676                 int idx = (se / 2) * 2;
3677
3678                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3679                         raster_config_se &= ~SE_MAP_MASK;
3680
3681                         if (!se_mask[idx]) {
3682                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3683                         } else {
3684                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3685                         }
3686                 }
3687
3688                 pkr0_mask &= rb_mask;
3689                 pkr1_mask &= rb_mask;
3690                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3691                         raster_config_se &= ~PKR_MAP_MASK;
3692
3693                         if (!pkr0_mask) {
3694                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3695                         } else {
3696                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3697                         }
3698                 }
3699
3700                 if (rb_per_se >= 2) {
3701                         unsigned rb0_mask = 1 << (se * rb_per_se);
3702                         unsigned rb1_mask = rb0_mask << 1;
3703
3704                         rb0_mask &= rb_mask;
3705                         rb1_mask &= rb_mask;
3706                         if (!rb0_mask || !rb1_mask) {
3707                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3708
3709                                 if (!rb0_mask) {
3710                                         raster_config_se |=
3711                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3712                                 } else {
3713                                         raster_config_se |=
3714                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3715                                 }
3716                         }
3717
3718                         if (rb_per_se > 2) {
3719                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3720                                 rb1_mask = rb0_mask << 1;
3721                                 rb0_mask &= rb_mask;
3722                                 rb1_mask &= rb_mask;
3723                                 if (!rb0_mask || !rb1_mask) {
3724                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3725
3726                                         if (!rb0_mask) {
3727                                                 raster_config_se |=
3728                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3729                                         } else {
3730                                                 raster_config_se |=
3731                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3732                                         }
3733                                 }
3734                         }
3735                 }
3736
3737                 /* GRBM_GFX_INDEX has a different offset on VI */
3738                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3739                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3740                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3741         }
3742
3743         /* GRBM_GFX_INDEX has a different offset on VI */
3744         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3745 }
3746
3747 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3748 {
3749         int i, j;
3750         u32 data;
3751         u32 raster_config = 0, raster_config_1 = 0;
3752         u32 active_rbs = 0;
3753         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3754                                         adev->gfx.config.max_sh_per_se;
3755         unsigned num_rb_pipes;
3756
3757         mutex_lock(&adev->grbm_idx_mutex);
3758         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3759                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3760                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3761                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3762                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3763                                                rb_bitmap_width_per_sh);
3764                 }
3765         }
3766         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3767
3768         adev->gfx.config.backend_enable_mask = active_rbs;
3769         adev->gfx.config.num_rbs = hweight32(active_rbs);
3770
3771         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3772                              adev->gfx.config.max_shader_engines, 16);
3773
3774         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3775
3776         if (!adev->gfx.config.backend_enable_mask ||
3777                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3778                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3779                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3780         } else {
3781                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3782                                                         adev->gfx.config.backend_enable_mask,
3783                                                         num_rb_pipes);
3784         }
3785
3786         /* cache the values for userspace */
3787         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3788                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3789                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3790                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3791                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3792                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3793                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3794                         adev->gfx.config.rb_config[i][j].raster_config =
3795                                 RREG32(mmPA_SC_RASTER_CONFIG);
3796                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3797                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3798                 }
3799         }
3800         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3801         mutex_unlock(&adev->grbm_idx_mutex);
3802 }
3803
3804 /**
3805  * gfx_v8_0_init_compute_vmid - gart enable
3806  *
3807  * @rdev: amdgpu_device pointer
3808  *
3809  * Initialize compute vmid sh_mem registers
3810  *
3811  */
3812 #define DEFAULT_SH_MEM_BASES    (0x6000)
3813 #define FIRST_COMPUTE_VMID      (8)
3814 #define LAST_COMPUTE_VMID       (16)
3815 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3816 {
3817         int i;
3818         uint32_t sh_mem_config;
3819         uint32_t sh_mem_bases;
3820
3821         /*
3822          * Configure apertures:
3823          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3824          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3825          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3826          */
3827         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3828
3829         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3830                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3831                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3832                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3833                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3834                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3835
3836         mutex_lock(&adev->srbm_mutex);
3837         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3838                 vi_srbm_select(adev, 0, 0, 0, i);
3839                 /* CP and shaders */
3840                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3841                 WREG32(mmSH_MEM_APE1_BASE, 1);
3842                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3843                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3844         }
3845         vi_srbm_select(adev, 0, 0, 0, 0);
3846         mutex_unlock(&adev->srbm_mutex);
3847 }
3848
3849 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3850 {
3851         switch (adev->asic_type) {
3852         default:
3853                 adev->gfx.config.double_offchip_lds_buf = 1;
3854                 break;
3855         case CHIP_CARRIZO:
3856         case CHIP_STONEY:
3857                 adev->gfx.config.double_offchip_lds_buf = 0;
3858                 break;
3859         }
3860 }
3861
3862 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3863 {
3864         u32 tmp, sh_static_mem_cfg;
3865         int i;
3866
3867         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3868         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3869         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3870         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3871
3872         gfx_v8_0_tiling_mode_table_init(adev);
3873         gfx_v8_0_setup_rb(adev);
3874         gfx_v8_0_get_cu_info(adev);
3875         gfx_v8_0_config_init(adev);
3876
3877         /* XXX SH_MEM regs */
3878         /* where to put LDS, scratch, GPUVM in FSA64 space */
3879         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3880                                    SWIZZLE_ENABLE, 1);
3881         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3882                                    ELEMENT_SIZE, 1);
3883         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3884                                    INDEX_STRIDE, 3);
3885         mutex_lock(&adev->srbm_mutex);
3886         for (i = 0; i < adev->vm_manager.num_ids; i++) {
3887                 vi_srbm_select(adev, 0, 0, 0, i);
3888                 /* CP and shaders */
3889                 if (i == 0) {
3890                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3891                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3892                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3893                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3894                         WREG32(mmSH_MEM_CONFIG, tmp);
3895                         WREG32(mmSH_MEM_BASES, 0);
3896                 } else {
3897                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3898                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3899                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3900                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3901                         WREG32(mmSH_MEM_CONFIG, tmp);
3902                         tmp = adev->mc.shared_aperture_start >> 48;
3903                         WREG32(mmSH_MEM_BASES, tmp);
3904                 }
3905
3906                 WREG32(mmSH_MEM_APE1_BASE, 1);
3907                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3908                 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3909         }
3910         vi_srbm_select(adev, 0, 0, 0, 0);
3911         mutex_unlock(&adev->srbm_mutex);
3912
3913         gfx_v8_0_init_compute_vmid(adev);
3914
3915         mutex_lock(&adev->grbm_idx_mutex);
3916         /*
3917          * making sure that the following register writes will be broadcasted
3918          * to all the shaders
3919          */
3920         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3921
3922         WREG32(mmPA_SC_FIFO_SIZE,
3923                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3924                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3925                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3926                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3927                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3928                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3929                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3930                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3931
3932         tmp = RREG32(mmSPI_ARB_PRIORITY);
3933         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3934         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3935         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3936         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3937         WREG32(mmSPI_ARB_PRIORITY, tmp);
3938
3939         mutex_unlock(&adev->grbm_idx_mutex);
3940
3941 }
3942
3943 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3944 {
3945         u32 i, j, k;
3946         u32 mask;
3947
3948         mutex_lock(&adev->grbm_idx_mutex);
3949         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3950                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3951                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3952                         for (k = 0; k < adev->usec_timeout; k++) {
3953                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3954                                         break;
3955                                 udelay(1);
3956                         }
3957                 }
3958         }
3959         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3960         mutex_unlock(&adev->grbm_idx_mutex);
3961
3962         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3963                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3964                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3965                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3966         for (k = 0; k < adev->usec_timeout; k++) {
3967                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3968                         break;
3969                 udelay(1);
3970         }
3971 }
3972
3973 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3974                                                bool enable)
3975 {
3976         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3977
3978         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3979         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3980         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3981         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3982
3983         WREG32(mmCP_INT_CNTL_RING0, tmp);
3984 }
3985
3986 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3987 {
3988         /* csib */
3989         WREG32(mmRLC_CSIB_ADDR_HI,
3990                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3991         WREG32(mmRLC_CSIB_ADDR_LO,
3992                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3993         WREG32(mmRLC_CSIB_LENGTH,
3994                         adev->gfx.rlc.clear_state_size);
3995 }
3996
3997 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3998                                 int ind_offset,
3999                                 int list_size,
4000                                 int *unique_indices,
4001                                 int *indices_count,
4002                                 int max_indices,
4003                                 int *ind_start_offsets,
4004                                 int *offset_count,
4005                                 int max_offset)
4006 {
4007         int indices;
4008         bool new_entry = true;
4009
4010         for (; ind_offset < list_size; ind_offset++) {
4011
4012                 if (new_entry) {
4013                         new_entry = false;
4014                         ind_start_offsets[*offset_count] = ind_offset;
4015                         *offset_count = *offset_count + 1;
4016                         BUG_ON(*offset_count >= max_offset);
4017                 }
4018
4019                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4020                         new_entry = true;
4021                         continue;
4022                 }
4023
4024                 ind_offset += 2;
4025
4026                 /* look for the matching indice */
4027                 for (indices = 0;
4028                         indices < *indices_count;
4029                         indices++) {
4030                         if (unique_indices[indices] ==
4031                                 register_list_format[ind_offset])
4032                                 break;
4033                 }
4034
4035                 if (indices >= *indices_count) {
4036                         unique_indices[*indices_count] =
4037                                 register_list_format[ind_offset];
4038                         indices = *indices_count;
4039                         *indices_count = *indices_count + 1;
4040                         BUG_ON(*indices_count >= max_indices);
4041                 }
4042
4043                 register_list_format[ind_offset] = indices;
4044         }
4045 }
4046
4047 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4048 {
4049         int i, temp, data;
4050         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4051         int indices_count = 0;
4052         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4053         int offset_count = 0;
4054
4055         int list_size;
4056         unsigned int *register_list_format =
4057                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4058         if (!register_list_format)
4059                 return -ENOMEM;
4060         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4061                         adev->gfx.rlc.reg_list_format_size_bytes);
4062
4063         gfx_v8_0_parse_ind_reg_list(register_list_format,
4064                                 RLC_FormatDirectRegListLength,
4065                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4066                                 unique_indices,
4067                                 &indices_count,
4068                                 sizeof(unique_indices) / sizeof(int),
4069                                 indirect_start_offsets,
4070                                 &offset_count,
4071                                 sizeof(indirect_start_offsets)/sizeof(int));
4072
4073         /* save and restore list */
4074         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4075
4076         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4077         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4078                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4079
4080         /* indirect list */
4081         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4082         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4083                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4084
4085         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4086         list_size = list_size >> 1;
4087         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4088         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4089
4090         /* starting offsets starts */
4091         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4092                 adev->gfx.rlc.starting_offsets_start);
4093         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4094                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4095                                 indirect_start_offsets[i]);
4096
4097         /* unique indices */
4098         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4099         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4100         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4101                 if (unique_indices[i] != 0) {
4102                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4103                         WREG32(data + i, unique_indices[i] >> 20);
4104                 }
4105         }
4106         kfree(register_list_format);
4107
4108         return 0;
4109 }
4110
4111 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4112 {
4113         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4114 }
4115
4116 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4117 {
4118         uint32_t data;
4119
4120         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4121
4122         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4123         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4124         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4125         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4126         WREG32(mmRLC_PG_DELAY, data);
4127
4128         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4129         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4130
4131 }
4132
4133 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4134                                                 bool enable)
4135 {
4136         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4137 }
4138
4139 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4140                                                   bool enable)
4141 {
4142         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4143 }
4144
4145 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4146 {
4147         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4148 }
4149
4150 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4151 {
4152         if ((adev->asic_type == CHIP_CARRIZO) ||
4153             (adev->asic_type == CHIP_STONEY)) {
4154                 gfx_v8_0_init_csb(adev);
4155                 gfx_v8_0_init_save_restore_list(adev);
4156                 gfx_v8_0_enable_save_restore_machine(adev);
4157                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4158                 gfx_v8_0_init_power_gating(adev);
4159                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4160         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4161                    (adev->asic_type == CHIP_POLARIS12)) {
4162                 gfx_v8_0_init_csb(adev);
4163                 gfx_v8_0_init_save_restore_list(adev);
4164                 gfx_v8_0_enable_save_restore_machine(adev);
4165                 gfx_v8_0_init_power_gating(adev);
4166         }
4167
4168 }
4169
4170 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4171 {
4172         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4173
4174         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4175         gfx_v8_0_wait_for_rlc_serdes(adev);
4176 }
4177
4178 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4179 {
4180         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4181         udelay(50);
4182
4183         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4184         udelay(50);
4185 }
4186
4187 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4188 {
4189         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4190
4191         /* carrizo do enable cp interrupt after cp inited */
4192         if (!(adev->flags & AMD_IS_APU))
4193                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4194
4195         udelay(50);
4196 }
4197
4198 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4199 {
4200         const struct rlc_firmware_header_v2_0 *hdr;
4201         const __le32 *fw_data;
4202         unsigned i, fw_size;
4203
4204         if (!adev->gfx.rlc_fw)
4205                 return -EINVAL;
4206
4207         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4208         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4209
4210         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4211                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4212         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4213
4214         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4215         for (i = 0; i < fw_size; i++)
4216                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4217         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4218
4219         return 0;
4220 }
4221
4222 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4223 {
4224         int r;
4225         u32 tmp;
4226
4227         gfx_v8_0_rlc_stop(adev);
4228
4229         /* disable CG */
4230         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4231         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4232                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4233         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4234         if (adev->asic_type == CHIP_POLARIS11 ||
4235             adev->asic_type == CHIP_POLARIS10 ||
4236             adev->asic_type == CHIP_POLARIS12) {
4237                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4238                 tmp &= ~0x3;
4239                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4240         }
4241
4242         /* disable PG */
4243         WREG32(mmRLC_PG_CNTL, 0);
4244
4245         gfx_v8_0_rlc_reset(adev);
4246         gfx_v8_0_init_pg(adev);
4247
4248         if (!adev->pp_enabled) {
4249                 if (!adev->firmware.smu_load) {
4250                         /* legacy rlc firmware loading */
4251                         r = gfx_v8_0_rlc_load_microcode(adev);
4252                         if (r)
4253                                 return r;
4254                 } else {
4255                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4256                                                         AMDGPU_UCODE_ID_RLC_G);
4257                         if (r)
4258                                 return -EINVAL;
4259                 }
4260         }
4261
4262         gfx_v8_0_rlc_start(adev);
4263
4264         return 0;
4265 }
4266
4267 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4268 {
4269         int i;
4270         u32 tmp = RREG32(mmCP_ME_CNTL);
4271
4272         if (enable) {
4273                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4274                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4275                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4276         } else {
4277                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4278                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4279                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4280                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4281                         adev->gfx.gfx_ring[i].ready = false;
4282         }
4283         WREG32(mmCP_ME_CNTL, tmp);
4284         udelay(50);
4285 }
4286
4287 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4288 {
4289         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4290         const struct gfx_firmware_header_v1_0 *ce_hdr;
4291         const struct gfx_firmware_header_v1_0 *me_hdr;
4292         const __le32 *fw_data;
4293         unsigned i, fw_size;
4294
4295         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296                 return -EINVAL;
4297
4298         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4299                 adev->gfx.pfp_fw->data;
4300         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4301                 adev->gfx.ce_fw->data;
4302         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4303                 adev->gfx.me_fw->data;
4304
4305         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4306         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4307         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4308
4309         gfx_v8_0_cp_gfx_enable(adev, false);
4310
4311         /* PFP */
4312         fw_data = (const __le32 *)
4313                 (adev->gfx.pfp_fw->data +
4314                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4317         for (i = 0; i < fw_size; i++)
4318                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4320
4321         /* CE */
4322         fw_data = (const __le32 *)
4323                 (adev->gfx.ce_fw->data +
4324                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4325         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4326         WREG32(mmCP_CE_UCODE_ADDR, 0);
4327         for (i = 0; i < fw_size; i++)
4328                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4329         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4330
4331         /* ME */
4332         fw_data = (const __le32 *)
4333                 (adev->gfx.me_fw->data +
4334                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4335         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4336         WREG32(mmCP_ME_RAM_WADDR, 0);
4337         for (i = 0; i < fw_size; i++)
4338                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4339         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4340
4341         return 0;
4342 }
4343
4344 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4345 {
4346         u32 count = 0;
4347         const struct cs_section_def *sect = NULL;
4348         const struct cs_extent_def *ext = NULL;
4349
4350         /* begin clear state */
4351         count += 2;
4352         /* context control state */
4353         count += 3;
4354
4355         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4356                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4357                         if (sect->id == SECT_CONTEXT)
4358                                 count += 2 + ext->reg_count;
4359                         else
4360                                 return 0;
4361                 }
4362         }
4363         /* pa_sc_raster_config/pa_sc_raster_config1 */
4364         count += 4;
4365         /* end clear state */
4366         count += 2;
4367         /* clear state */
4368         count += 2;
4369
4370         return count;
4371 }
4372
4373 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4374 {
4375         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4376         const struct cs_section_def *sect = NULL;
4377         const struct cs_extent_def *ext = NULL;
4378         int r, i;
4379
4380         /* init the CP */
4381         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4382         WREG32(mmCP_ENDIAN_SWAP, 0);
4383         WREG32(mmCP_DEVICE_ID, 1);
4384
4385         gfx_v8_0_cp_gfx_enable(adev, true);
4386
4387         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4388         if (r) {
4389                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4390                 return r;
4391         }
4392
4393         /* clear state buffer */
4394         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4395         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4396
4397         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4398         amdgpu_ring_write(ring, 0x80000000);
4399         amdgpu_ring_write(ring, 0x80000000);
4400
4401         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4402                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4403                         if (sect->id == SECT_CONTEXT) {
4404                                 amdgpu_ring_write(ring,
4405                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4406                                                ext->reg_count));
4407                                 amdgpu_ring_write(ring,
4408                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4409                                 for (i = 0; i < ext->reg_count; i++)
4410                                         amdgpu_ring_write(ring, ext->extent[i]);
4411                         }
4412                 }
4413         }
4414
4415         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4416         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4417         switch (adev->asic_type) {
4418         case CHIP_TONGA:
4419         case CHIP_POLARIS10:
4420                 amdgpu_ring_write(ring, 0x16000012);
4421                 amdgpu_ring_write(ring, 0x0000002A);
4422                 break;
4423         case CHIP_POLARIS11:
4424         case CHIP_POLARIS12:
4425                 amdgpu_ring_write(ring, 0x16000012);
4426                 amdgpu_ring_write(ring, 0x00000000);
4427                 break;
4428         case CHIP_FIJI:
4429                 amdgpu_ring_write(ring, 0x3a00161a);
4430                 amdgpu_ring_write(ring, 0x0000002e);
4431                 break;
4432         case CHIP_CARRIZO:
4433                 amdgpu_ring_write(ring, 0x00000002);
4434                 amdgpu_ring_write(ring, 0x00000000);
4435                 break;
4436         case CHIP_TOPAZ:
4437                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4438                                 0x00000000 : 0x00000002);
4439                 amdgpu_ring_write(ring, 0x00000000);
4440                 break;
4441         case CHIP_STONEY:
4442                 amdgpu_ring_write(ring, 0x00000000);
4443                 amdgpu_ring_write(ring, 0x00000000);
4444                 break;
4445         default:
4446                 BUG();
4447         }
4448
4449         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4450         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4451
4452         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4453         amdgpu_ring_write(ring, 0);
4454
4455         /* init the CE partitions */
4456         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4457         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4458         amdgpu_ring_write(ring, 0x8000);
4459         amdgpu_ring_write(ring, 0x8000);
4460
4461         amdgpu_ring_commit(ring);
4462
4463         return 0;
4464 }
4465
4466 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4467 {
4468         struct amdgpu_ring *ring;
4469         u32 tmp;
4470         u32 rb_bufsz;
4471         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4472         int r;
4473
4474         /* Set the write pointer delay */
4475         WREG32(mmCP_RB_WPTR_DELAY, 0);
4476
4477         /* set the RB to use vmid 0 */
4478         WREG32(mmCP_RB_VMID, 0);
4479
4480         /* Set ring buffer size */
4481         ring = &adev->gfx.gfx_ring[0];
4482         rb_bufsz = order_base_2(ring->ring_size / 8);
4483         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4484         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4485         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4486         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4487 #ifdef __BIG_ENDIAN
4488         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4489 #endif
4490         WREG32(mmCP_RB0_CNTL, tmp);
4491
4492         /* Initialize the ring buffer's read and write pointers */
4493         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4494         ring->wptr = 0;
4495         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4496
4497         /* set the wb address wether it's enabled or not */
4498         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4499         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4500         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4501
4502         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4503         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4504         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4505         mdelay(1);
4506         WREG32(mmCP_RB0_CNTL, tmp);
4507
4508         rb_addr = ring->gpu_addr >> 8;
4509         WREG32(mmCP_RB0_BASE, rb_addr);
4510         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4511
4512         /* no gfx doorbells on iceland */
4513         if (adev->asic_type != CHIP_TOPAZ) {
4514                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4515                 if (ring->use_doorbell) {
4516                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4517                                             DOORBELL_OFFSET, ring->doorbell_index);
4518                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4519                                             DOORBELL_HIT, 0);
4520                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4521                                             DOORBELL_EN, 1);
4522                 } else {
4523                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4524                                             DOORBELL_EN, 0);
4525                 }
4526                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4527
4528                 if (adev->asic_type == CHIP_TONGA) {
4529                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4530                                             DOORBELL_RANGE_LOWER,
4531                                             AMDGPU_DOORBELL_GFX_RING0);
4532                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4533
4534                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4535                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4536                 }
4537
4538         }
4539
4540         /* start the ring */
4541         amdgpu_ring_clear_ring(ring);
4542         gfx_v8_0_cp_gfx_start(adev);
4543         ring->ready = true;
4544         r = amdgpu_ring_test_ring(ring);
4545         if (r)
4546                 ring->ready = false;
4547
4548         return r;
4549 }
4550
4551 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4552 {
4553         int i;
4554
4555         if (enable) {
4556                 WREG32(mmCP_MEC_CNTL, 0);
4557         } else {
4558                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4559                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4560                         adev->gfx.compute_ring[i].ready = false;
4561         }
4562         udelay(50);
4563 }
4564
4565 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4566 {
4567         const struct gfx_firmware_header_v1_0 *mec_hdr;
4568         const __le32 *fw_data;
4569         unsigned i, fw_size;
4570
4571         if (!adev->gfx.mec_fw)
4572                 return -EINVAL;
4573
4574         gfx_v8_0_cp_compute_enable(adev, false);
4575
4576         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4577         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4578
4579         fw_data = (const __le32 *)
4580                 (adev->gfx.mec_fw->data +
4581                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4582         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4583
4584         /* MEC1 */
4585         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4586         for (i = 0; i < fw_size; i++)
4587                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4588         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4589
4590         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4591         if (adev->gfx.mec2_fw) {
4592                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4593
4594                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4595                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4596
4597                 fw_data = (const __le32 *)
4598                         (adev->gfx.mec2_fw->data +
4599                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4600                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4601
4602                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4603                 for (i = 0; i < fw_size; i++)
4604                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4605                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4606         }
4607
4608         return 0;
4609 }
4610
4611 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4612 {
4613         int i, r;
4614
4615         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4616                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4617
4618                 if (ring->mqd_obj) {
4619                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4620                         if (unlikely(r != 0))
4621                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4622
4623                         amdgpu_bo_unpin(ring->mqd_obj);
4624                         amdgpu_bo_unreserve(ring->mqd_obj);
4625
4626                         amdgpu_bo_unref(&ring->mqd_obj);
4627                         ring->mqd_obj = NULL;
4628                         ring->mqd_ptr = NULL;
4629                         ring->mqd_gpu_addr = 0;
4630                 }
4631         }
4632 }
4633
4634 /* KIQ functions */
4635 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4636 {
4637         uint32_t tmp;
4638         struct amdgpu_device *adev = ring->adev;
4639
4640         /* tell RLC which is KIQ queue */
4641         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4642         tmp &= 0xffffff00;
4643         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4644         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4645         tmp |= 0x80;
4646         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4647 }
4648
4649 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4650 {
4651         amdgpu_ring_alloc(ring, 8);
4652         /* set resources */
4653         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4654         amdgpu_ring_write(ring, 0);     /* vmid_mask:0 queue_type:0 (KIQ) */
4655         amdgpu_ring_write(ring, 0x000000FF);    /* queue mask lo */
4656         amdgpu_ring_write(ring, 0);     /* queue mask hi */
4657         amdgpu_ring_write(ring, 0);     /* gws mask lo */
4658         amdgpu_ring_write(ring, 0);     /* gws mask hi */
4659         amdgpu_ring_write(ring, 0);     /* oac mask */
4660         amdgpu_ring_write(ring, 0);     /* gds heap base:0, gds heap size:0 */
4661         amdgpu_ring_commit(ring);
4662         udelay(50);
4663 }
4664
4665 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4666                                    struct amdgpu_ring *ring)
4667 {
4668         struct amdgpu_device *adev = kiq_ring->adev;
4669         uint64_t mqd_addr, wptr_addr;
4670
4671         mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4672         wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4673         amdgpu_ring_alloc(kiq_ring, 8);
4674
4675         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4676         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4677         amdgpu_ring_write(kiq_ring, 0x21010000);
4678         amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4679                         (ring->queue << 26) |
4680                         (ring->pipe << 29) |
4681                         ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4682         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4683         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4684         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4685         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4686         amdgpu_ring_commit(kiq_ring);
4687         udelay(50);
4688 }
4689
4690 static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4691                              struct vi_mqd *mqd,
4692                              uint64_t mqd_gpu_addr,
4693                              uint64_t eop_gpu_addr,
4694                              struct amdgpu_ring *ring)
4695 {
4696         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4697         uint32_t tmp;
4698
4699         mqd->header = 0xC0310800;
4700         mqd->compute_pipelinestat_enable = 0x00000001;
4701         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4702         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4703         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4704         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4705         mqd->compute_misc_reserved = 0x00000003;
4706
4707         eop_base_addr = eop_gpu_addr >> 8;
4708         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4709         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4710
4711         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4712         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4713         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4714                         (order_base_2(MEC_HPD_SIZE / 4) - 1));
4715
4716         mqd->cp_hqd_eop_control = tmp;
4717
4718         /* enable doorbell? */
4719         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4720
4721         if (ring->use_doorbell)
4722                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4723                                          DOORBELL_EN, 1);
4724         else
4725                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4726                                          DOORBELL_EN, 0);
4727
4728         mqd->cp_hqd_pq_doorbell_control = tmp;
4729
4730         /* disable the queue if it's active */
4731         mqd->cp_hqd_dequeue_request = 0;
4732         mqd->cp_hqd_pq_rptr = 0;
4733         mqd->cp_hqd_pq_wptr = 0;
4734
4735         /* set the pointer to the MQD */
4736         mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4737         mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4738
4739         /* set MQD vmid to 0 */
4740         tmp = RREG32(mmCP_MQD_CONTROL);
4741         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4742         mqd->cp_mqd_control = tmp;
4743
4744         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4745         hqd_gpu_addr = ring->gpu_addr >> 8;
4746         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4747         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4748
4749         /* set up the HQD, this is similar to CP_RB0_CNTL */
4750         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4751         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4752                             (order_base_2(ring->ring_size / 4) - 1));
4753         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4754                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4755 #ifdef __BIG_ENDIAN
4756         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4757 #endif
4758         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4759         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4760         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4761         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4762         mqd->cp_hqd_pq_control = tmp;
4763
4764         /* set the wb address whether it's enabled or not */
4765         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4766         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4767         mqd->cp_hqd_pq_rptr_report_addr_hi =
4768                 upper_32_bits(wb_gpu_addr) & 0xffff;
4769
4770         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4771         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4772         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4773         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4774
4775         tmp = 0;
4776         /* enable the doorbell if requested */
4777         if (ring->use_doorbell) {
4778                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4779                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4780                                 DOORBELL_OFFSET, ring->doorbell_index);
4781
4782                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4783                                          DOORBELL_EN, 1);
4784                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4785                                          DOORBELL_SOURCE, 0);
4786                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4787                                          DOORBELL_HIT, 0);
4788         }
4789
4790         mqd->cp_hqd_pq_doorbell_control = tmp;
4791
4792         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4793         ring->wptr = 0;
4794         mqd->cp_hqd_pq_wptr = ring->wptr;
4795         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4796
4797         /* set the vmid for the queue */
4798         mqd->cp_hqd_vmid = 0;
4799
4800         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4801         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4802         mqd->cp_hqd_persistent_state = tmp;
4803
4804         /* activate the queue */
4805         mqd->cp_hqd_active = 1;
4806
4807         return 0;
4808 }
4809
4810 static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4811                                       struct vi_mqd *mqd,
4812                                       struct amdgpu_ring *ring)
4813 {
4814         uint32_t tmp;
4815         int j;
4816
4817         /* disable wptr polling */
4818         tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4819         tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4820         WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4821
4822         WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4823         WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4824
4825         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4826         WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4827
4828         /* enable doorbell? */
4829         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4830
4831         /* disable the queue if it's active */
4832         if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4833                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4834                 for (j = 0; j < adev->usec_timeout; j++) {
4835                         if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4836                                 break;
4837                         udelay(1);
4838                 }
4839                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4840                 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4841                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4842         }
4843
4844         /* set the pointer to the MQD */
4845         WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4846         WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4847
4848         /* set MQD vmid to 0 */
4849         WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4850
4851         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4852         WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4853         WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4854
4855         /* set up the HQD, this is similar to CP_RB0_CNTL */
4856         WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4857
4858         /* set the wb address whether it's enabled or not */
4859         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4860                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
4861         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4862                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
4863
4864         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4865         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4866         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4867
4868         /* enable the doorbell if requested */
4869         if (ring->use_doorbell) {
4870                 if ((adev->asic_type == CHIP_CARRIZO) ||
4871                                 (adev->asic_type == CHIP_FIJI) ||
4872                                 (adev->asic_type == CHIP_STONEY)) {
4873                         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4874                                                 AMDGPU_DOORBELL_KIQ << 2);
4875                         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4876                                                 AMDGPU_DOORBELL_MEC_RING7 << 2);
4877                 }
4878         }
4879         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4880
4881         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4882         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4883
4884         /* set the vmid for the queue */
4885         WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4886
4887         WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4888
4889         /* activate the queue */
4890         WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4891
4892         if (ring->use_doorbell) {
4893                 tmp = RREG32(mmCP_PQ_STATUS);
4894                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4895                 WREG32(mmCP_PQ_STATUS, tmp);
4896         }
4897
4898         return 0;
4899 }
4900
4901 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4902                                    struct vi_mqd *mqd,
4903                                    u64 mqd_gpu_addr)
4904 {
4905         struct amdgpu_device *adev = ring->adev;
4906         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4907         uint64_t eop_gpu_addr;
4908         bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
4909         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4910
4911         if (is_kiq) {
4912                 eop_gpu_addr = kiq->eop_gpu_addr;
4913                 gfx_v8_0_kiq_setting(&kiq->ring);
4914         } else {
4915                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4916                                         ring->queue * MEC_HPD_SIZE;
4917                 mqd_idx = ring - &adev->gfx.compute_ring[0];
4918         }
4919
4920         if (!adev->gfx.in_reset) {
4921                 memset((void *)mqd, 0, sizeof(*mqd));
4922                 mutex_lock(&adev->srbm_mutex);
4923                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4924                 gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4925                 if (is_kiq)
4926                         gfx_v8_0_kiq_init_register(adev, mqd, ring);
4927                 vi_srbm_select(adev, 0, 0, 0, 0);
4928                 mutex_unlock(&adev->srbm_mutex);
4929
4930                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4931                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4932         } else { /* for GPU_RESET case */
4933                 /* reset MQD to a clean status */
4934                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4935                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4936
4937                 /* reset ring buffer */
4938                 ring->wptr = 0;
4939                 amdgpu_ring_clear_ring(ring);
4940
4941                 if (is_kiq) {
4942                     mutex_lock(&adev->srbm_mutex);
4943                     vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4944                     gfx_v8_0_kiq_init_register(adev, mqd, ring);
4945                     vi_srbm_select(adev, 0, 0, 0, 0);
4946                     mutex_unlock(&adev->srbm_mutex);
4947                 }
4948         }
4949
4950         if (is_kiq)
4951                 gfx_v8_0_kiq_enable(ring);
4952         else
4953                 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4954
4955         return 0;
4956 }
4957
4958 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4959 {
4960         struct amdgpu_ring *ring = NULL;
4961         int r = 0, i;
4962
4963         gfx_v8_0_cp_compute_enable(adev, true);
4964
4965         ring = &adev->gfx.kiq.ring;
4966         if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
4967                 r = gfx_v8_0_kiq_init_queue(ring,
4968                                             (struct vi_mqd *)ring->mqd_ptr,
4969                                             ring->mqd_gpu_addr);
4970                 amdgpu_bo_kunmap(ring->mqd_obj);
4971                 ring->mqd_ptr = NULL;
4972                 if (r)
4973                         return r;
4974         } else {
4975                 return r;
4976         }
4977
4978         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4979                 ring = &adev->gfx.compute_ring[i];
4980                 if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
4981                         r = gfx_v8_0_kiq_init_queue(ring,
4982                                                     (struct vi_mqd *)ring->mqd_ptr,
4983                                                     ring->mqd_gpu_addr);
4984                         amdgpu_bo_kunmap(ring->mqd_obj);
4985                         ring->mqd_ptr = NULL;
4986                         if (r)
4987                         return r;
4988                 } else {
4989                         return r;
4990                 }
4991         }
4992
4993         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4994                 ring = &adev->gfx.compute_ring[i];
4995
4996                 ring->ready = true;
4997                 r = amdgpu_ring_test_ring(ring);
4998                 if (r)
4999                         ring->ready = false;
5000         }
5001
5002         ring = &adev->gfx.kiq.ring;
5003         ring->ready = true;
5004         r = amdgpu_ring_test_ring(ring);
5005         if (r)
5006                 ring->ready = false;
5007
5008         return 0;
5009 }
5010
5011 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5012 {
5013         int r, i, j;
5014         u32 tmp;
5015         bool use_doorbell = true;
5016         u64 hqd_gpu_addr;
5017         u64 mqd_gpu_addr;
5018         u64 eop_gpu_addr;
5019         u64 wb_gpu_addr;
5020         u32 *buf;
5021         struct vi_mqd *mqd;
5022
5023         /* init the queues.  */
5024         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5025                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5026
5027                 if (ring->mqd_obj == NULL) {
5028                         r = amdgpu_bo_create(adev,
5029                                              sizeof(struct vi_mqd),
5030                                              PAGE_SIZE, true,
5031                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5032                                              NULL, &ring->mqd_obj);
5033                         if (r) {
5034                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5035                                 return r;
5036                         }
5037                 }
5038
5039                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5040                 if (unlikely(r != 0)) {
5041                         gfx_v8_0_cp_compute_fini(adev);
5042                         return r;
5043                 }
5044                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5045                                   &mqd_gpu_addr);
5046                 if (r) {
5047                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5048                         gfx_v8_0_cp_compute_fini(adev);
5049                         return r;
5050                 }
5051                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5052                 if (r) {
5053                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5054                         gfx_v8_0_cp_compute_fini(adev);
5055                         return r;
5056                 }
5057
5058                 /* init the mqd struct */
5059                 memset(buf, 0, sizeof(struct vi_mqd));
5060
5061                 mqd = (struct vi_mqd *)buf;
5062                 mqd->header = 0xC0310800;
5063                 mqd->compute_pipelinestat_enable = 0x00000001;
5064                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5065                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5066                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5067                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5068                 mqd->compute_misc_reserved = 0x00000003;
5069
5070                 mutex_lock(&adev->srbm_mutex);
5071                 vi_srbm_select(adev, ring->me,
5072                                ring->pipe,
5073                                ring->queue, 0);
5074
5075                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5076                 eop_gpu_addr >>= 8;
5077
5078                 /* write the EOP addr */
5079                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5080                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5081
5082                 /* set the VMID assigned */
5083                 WREG32(mmCP_HQD_VMID, 0);
5084
5085                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5086                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5087                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5088                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
5089                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5090
5091                 /* disable wptr polling */
5092                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5093                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5094                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5095
5096                 mqd->cp_hqd_eop_base_addr_lo =
5097                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
5098                 mqd->cp_hqd_eop_base_addr_hi =
5099                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5100
5101                 /* enable doorbell? */
5102                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5103                 if (use_doorbell) {
5104                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5105                 } else {
5106                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5107                 }
5108                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5109                 mqd->cp_hqd_pq_doorbell_control = tmp;
5110
5111                 /* disable the queue if it's active */
5112                 mqd->cp_hqd_dequeue_request = 0;
5113                 mqd->cp_hqd_pq_rptr = 0;
5114                 mqd->cp_hqd_pq_wptr= 0;
5115                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5116                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5117                         for (j = 0; j < adev->usec_timeout; j++) {
5118                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5119                                         break;
5120                                 udelay(1);
5121                         }
5122                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5123                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5124                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5125                 }
5126
5127                 /* set the pointer to the MQD */
5128                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5129                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5130                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5131                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5132
5133                 /* set MQD vmid to 0 */
5134                 tmp = RREG32(mmCP_MQD_CONTROL);
5135                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5136                 WREG32(mmCP_MQD_CONTROL, tmp);
5137                 mqd->cp_mqd_control = tmp;
5138
5139                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5140                 hqd_gpu_addr = ring->gpu_addr >> 8;
5141                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5142                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5143                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5144                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5145
5146                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5147                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5148                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5149                                     (order_base_2(ring->ring_size / 4) - 1));
5150                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5151                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5152 #ifdef __BIG_ENDIAN
5153                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5154 #endif
5155                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5156                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5157                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5158                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5159                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5160                 mqd->cp_hqd_pq_control = tmp;
5161
5162                 /* set the wb address wether it's enabled or not */
5163                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5164                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5165                 mqd->cp_hqd_pq_rptr_report_addr_hi =
5166                         upper_32_bits(wb_gpu_addr) & 0xffff;
5167                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5168                        mqd->cp_hqd_pq_rptr_report_addr_lo);
5169                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5170                        mqd->cp_hqd_pq_rptr_report_addr_hi);
5171
5172                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5173                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5174                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5175                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5176                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5177                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5178                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
5179
5180                 /* enable the doorbell if requested */
5181                 if (use_doorbell) {
5182                         if ((adev->asic_type == CHIP_CARRIZO) ||
5183                             (adev->asic_type == CHIP_FIJI) ||
5184                             (adev->asic_type == CHIP_STONEY) ||
5185                             (adev->asic_type == CHIP_POLARIS11) ||
5186                             (adev->asic_type == CHIP_POLARIS10) ||
5187                             (adev->asic_type == CHIP_POLARIS12)) {
5188                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5189                                        AMDGPU_DOORBELL_KIQ << 2);
5190                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5191                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
5192                         }
5193                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5194                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5195                                             DOORBELL_OFFSET, ring->doorbell_index);
5196                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5197                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5198                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5199                         mqd->cp_hqd_pq_doorbell_control = tmp;
5200
5201                 } else {
5202                         mqd->cp_hqd_pq_doorbell_control = 0;
5203                 }
5204                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5205                        mqd->cp_hqd_pq_doorbell_control);
5206
5207                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5208                 ring->wptr = 0;
5209                 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
5210                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5211                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5212
5213                 /* set the vmid for the queue */
5214                 mqd->cp_hqd_vmid = 0;
5215                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5216
5217                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5218                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5219                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5220                 mqd->cp_hqd_persistent_state = tmp;
5221                 if (adev->asic_type == CHIP_STONEY ||
5222                         adev->asic_type == CHIP_POLARIS11 ||
5223                         adev->asic_type == CHIP_POLARIS10 ||
5224                         adev->asic_type == CHIP_POLARIS12) {
5225                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5226                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5227                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5228                 }
5229
5230                 /* activate the queue */
5231                 mqd->cp_hqd_active = 1;
5232                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5233
5234                 vi_srbm_select(adev, 0, 0, 0, 0);
5235                 mutex_unlock(&adev->srbm_mutex);
5236
5237                 amdgpu_bo_kunmap(ring->mqd_obj);
5238                 amdgpu_bo_unreserve(ring->mqd_obj);
5239         }
5240
5241         if (use_doorbell) {
5242                 tmp = RREG32(mmCP_PQ_STATUS);
5243                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5244                 WREG32(mmCP_PQ_STATUS, tmp);
5245         }
5246
5247         gfx_v8_0_cp_compute_enable(adev, true);
5248
5249         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5250                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5251
5252                 ring->ready = true;
5253                 r = amdgpu_ring_test_ring(ring);
5254                 if (r)
5255                         ring->ready = false;
5256         }
5257
5258         return 0;
5259 }
5260
5261 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5262 {
5263         int r;
5264
5265         if (!(adev->flags & AMD_IS_APU))
5266                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5267
5268         if (!adev->pp_enabled) {
5269                 if (!adev->firmware.smu_load) {
5270                         /* legacy firmware loading */
5271                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5272                         if (r)
5273                                 return r;
5274
5275                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5276                         if (r)
5277                                 return r;
5278                 } else {
5279                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5280                                                         AMDGPU_UCODE_ID_CP_CE);
5281                         if (r)
5282                                 return -EINVAL;
5283
5284                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5285                                                         AMDGPU_UCODE_ID_CP_PFP);
5286                         if (r)
5287                                 return -EINVAL;
5288
5289                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5290                                                         AMDGPU_UCODE_ID_CP_ME);
5291                         if (r)
5292                                 return -EINVAL;
5293
5294                         if (adev->asic_type == CHIP_TOPAZ) {
5295                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5296                                 if (r)
5297                                         return r;
5298                         } else {
5299                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5300                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5301                                 if (r)
5302                                         return -EINVAL;
5303                         }
5304                 }
5305         }
5306
5307         r = gfx_v8_0_cp_gfx_resume(adev);
5308         if (r)
5309                 return r;
5310
5311         if (amdgpu_sriov_vf(adev))
5312                 r = gfx_v8_0_kiq_resume(adev);
5313         else
5314                 r = gfx_v8_0_cp_compute_resume(adev);
5315         if (r)
5316                 return r;
5317
5318         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5319
5320         return 0;
5321 }
5322
5323 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5324 {
5325         gfx_v8_0_cp_gfx_enable(adev, enable);
5326         gfx_v8_0_cp_compute_enable(adev, enable);
5327 }
5328
5329 static int gfx_v8_0_hw_init(void *handle)
5330 {
5331         int r;
5332         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5333
5334         gfx_v8_0_init_golden_registers(adev);
5335         gfx_v8_0_gpu_init(adev);
5336
5337         r = gfx_v8_0_rlc_resume(adev);
5338         if (r)
5339                 return r;
5340
5341         r = gfx_v8_0_cp_resume(adev);
5342
5343         return r;
5344 }
5345
5346 static int gfx_v8_0_hw_fini(void *handle)
5347 {
5348         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5349
5350         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5351         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5352         if (amdgpu_sriov_vf(adev)) {
5353                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5354                 return 0;
5355         }
5356         gfx_v8_0_cp_enable(adev, false);
5357         gfx_v8_0_rlc_stop(adev);
5358         gfx_v8_0_cp_compute_fini(adev);
5359
5360         amdgpu_set_powergating_state(adev,
5361                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5362
5363         return 0;
5364 }
5365
5366 static int gfx_v8_0_suspend(void *handle)
5367 {
5368         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5369
5370         return gfx_v8_0_hw_fini(adev);
5371 }
5372
5373 static int gfx_v8_0_resume(void *handle)
5374 {
5375         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5376
5377         return gfx_v8_0_hw_init(adev);
5378 }
5379
5380 static bool gfx_v8_0_is_idle(void *handle)
5381 {
5382         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5383
5384         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5385                 return false;
5386         else
5387                 return true;
5388 }
5389
5390 static int gfx_v8_0_wait_for_idle(void *handle)
5391 {
5392         unsigned i;
5393         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5394
5395         for (i = 0; i < adev->usec_timeout; i++) {
5396                 if (gfx_v8_0_is_idle(handle))
5397                         return 0;
5398
5399                 udelay(1);
5400         }
5401         return -ETIMEDOUT;
5402 }
5403
5404 static bool gfx_v8_0_check_soft_reset(void *handle)
5405 {
5406         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5407         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5408         u32 tmp;
5409
5410         /* GRBM_STATUS */
5411         tmp = RREG32(mmGRBM_STATUS);
5412         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5413                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5414                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5415                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5416                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5417                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5418                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5419                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5420                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5421                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5422                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5423                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5424                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5425         }
5426
5427         /* GRBM_STATUS2 */
5428         tmp = RREG32(mmGRBM_STATUS2);
5429         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5430                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5431                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5432
5433         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5434             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5435             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5436                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5437                                                 SOFT_RESET_CPF, 1);
5438                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5439                                                 SOFT_RESET_CPC, 1);
5440                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5441                                                 SOFT_RESET_CPG, 1);
5442                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5443                                                 SOFT_RESET_GRBM, 1);
5444         }
5445
5446         /* SRBM_STATUS */
5447         tmp = RREG32(mmSRBM_STATUS);
5448         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5449                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5450                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5451         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5452                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5453                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5454
5455         if (grbm_soft_reset || srbm_soft_reset) {
5456                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5457                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5458                 return true;
5459         } else {
5460                 adev->gfx.grbm_soft_reset = 0;
5461                 adev->gfx.srbm_soft_reset = 0;
5462                 return false;
5463         }
5464 }
5465
5466 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5467                                   struct amdgpu_ring *ring)
5468 {
5469         int i;
5470
5471         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5472         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5473                 u32 tmp;
5474                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5475                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5476                                     DEQUEUE_REQ, 2);
5477                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5478                 for (i = 0; i < adev->usec_timeout; i++) {
5479                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5480                                 break;
5481                         udelay(1);
5482                 }
5483         }
5484 }
5485
5486 static int gfx_v8_0_pre_soft_reset(void *handle)
5487 {
5488         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5489         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5490
5491         if ((!adev->gfx.grbm_soft_reset) &&
5492             (!adev->gfx.srbm_soft_reset))
5493                 return 0;
5494
5495         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5496         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5497
5498         /* stop the rlc */
5499         gfx_v8_0_rlc_stop(adev);
5500
5501         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5502             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5503                 /* Disable GFX parsing/prefetching */
5504                 gfx_v8_0_cp_gfx_enable(adev, false);
5505
5506         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5507             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5508             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5509             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5510                 int i;
5511
5512                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5513                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5514
5515                         gfx_v8_0_inactive_hqd(adev, ring);
5516                 }
5517                 /* Disable MEC parsing/prefetching */
5518                 gfx_v8_0_cp_compute_enable(adev, false);
5519         }
5520
5521        return 0;
5522 }
5523
5524 static int gfx_v8_0_soft_reset(void *handle)
5525 {
5526         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5527         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5528         u32 tmp;
5529
5530         if ((!adev->gfx.grbm_soft_reset) &&
5531             (!adev->gfx.srbm_soft_reset))
5532                 return 0;
5533
5534         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5535         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5536
5537         if (grbm_soft_reset || srbm_soft_reset) {
5538                 tmp = RREG32(mmGMCON_DEBUG);
5539                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5540                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5541                 WREG32(mmGMCON_DEBUG, tmp);
5542                 udelay(50);
5543         }
5544
5545         if (grbm_soft_reset) {
5546                 tmp = RREG32(mmGRBM_SOFT_RESET);
5547                 tmp |= grbm_soft_reset;
5548                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5549                 WREG32(mmGRBM_SOFT_RESET, tmp);
5550                 tmp = RREG32(mmGRBM_SOFT_RESET);
5551
5552                 udelay(50);
5553
5554                 tmp &= ~grbm_soft_reset;
5555                 WREG32(mmGRBM_SOFT_RESET, tmp);
5556                 tmp = RREG32(mmGRBM_SOFT_RESET);
5557         }
5558
5559         if (srbm_soft_reset) {
5560                 tmp = RREG32(mmSRBM_SOFT_RESET);
5561                 tmp |= srbm_soft_reset;
5562                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5563                 WREG32(mmSRBM_SOFT_RESET, tmp);
5564                 tmp = RREG32(mmSRBM_SOFT_RESET);
5565
5566                 udelay(50);
5567
5568                 tmp &= ~srbm_soft_reset;
5569                 WREG32(mmSRBM_SOFT_RESET, tmp);
5570                 tmp = RREG32(mmSRBM_SOFT_RESET);
5571         }
5572
5573         if (grbm_soft_reset || srbm_soft_reset) {
5574                 tmp = RREG32(mmGMCON_DEBUG);
5575                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5576                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5577                 WREG32(mmGMCON_DEBUG, tmp);
5578         }
5579
5580         /* Wait a little for things to settle down */
5581         udelay(50);
5582
5583         return 0;
5584 }
5585
5586 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5587                               struct amdgpu_ring *ring)
5588 {
5589         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5590         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5591         WREG32(mmCP_HQD_PQ_RPTR, 0);
5592         WREG32(mmCP_HQD_PQ_WPTR, 0);
5593         vi_srbm_select(adev, 0, 0, 0, 0);
5594 }
5595
5596 static int gfx_v8_0_post_soft_reset(void *handle)
5597 {
5598         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5599         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5600
5601         if ((!adev->gfx.grbm_soft_reset) &&
5602             (!adev->gfx.srbm_soft_reset))
5603                 return 0;
5604
5605         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5606         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5607
5608         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5609             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5610                 gfx_v8_0_cp_gfx_resume(adev);
5611
5612         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5613             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5614             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5615             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5616                 int i;
5617
5618                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5619                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5620
5621                         gfx_v8_0_init_hqd(adev, ring);
5622                 }
5623                 gfx_v8_0_cp_compute_resume(adev);
5624         }
5625         gfx_v8_0_rlc_start(adev);
5626
5627         return 0;
5628 }
5629
5630 /**
5631  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5632  *
5633  * @adev: amdgpu_device pointer
5634  *
5635  * Fetches a GPU clock counter snapshot.
5636  * Returns the 64 bit clock counter snapshot.
5637  */
5638 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5639 {
5640         uint64_t clock;
5641
5642         mutex_lock(&adev->gfx.gpu_clock_mutex);
5643         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5644         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5645                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5646         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5647         return clock;
5648 }
5649
5650 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5651                                           uint32_t vmid,
5652                                           uint32_t gds_base, uint32_t gds_size,
5653                                           uint32_t gws_base, uint32_t gws_size,
5654                                           uint32_t oa_base, uint32_t oa_size)
5655 {
5656         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5657         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5658
5659         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5660         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5661
5662         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5663         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5664
5665         /* GDS Base */
5666         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5667         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5668                                 WRITE_DATA_DST_SEL(0)));
5669         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5670         amdgpu_ring_write(ring, 0);
5671         amdgpu_ring_write(ring, gds_base);
5672
5673         /* GDS Size */
5674         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5675         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5676                                 WRITE_DATA_DST_SEL(0)));
5677         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5678         amdgpu_ring_write(ring, 0);
5679         amdgpu_ring_write(ring, gds_size);
5680
5681         /* GWS */
5682         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5684                                 WRITE_DATA_DST_SEL(0)));
5685         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5686         amdgpu_ring_write(ring, 0);
5687         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5688
5689         /* OA */
5690         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5691         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5692                                 WRITE_DATA_DST_SEL(0)));
5693         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5694         amdgpu_ring_write(ring, 0);
5695         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5696 }
5697
5698 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5699 {
5700         WREG32(mmSQ_IND_INDEX,
5701                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5702                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5703                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5704                 (SQ_IND_INDEX__FORCE_READ_MASK));
5705         return RREG32(mmSQ_IND_DATA);
5706 }
5707
5708 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5709                            uint32_t wave, uint32_t thread,
5710                            uint32_t regno, uint32_t num, uint32_t *out)
5711 {
5712         WREG32(mmSQ_IND_INDEX,
5713                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5714                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5715                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5716                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5717                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5718                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5719         while (num--)
5720                 *(out++) = RREG32(mmSQ_IND_DATA);
5721 }
5722
5723 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5724 {
5725         /* type 0 wave data */
5726         dst[(*no_fields)++] = 0;
5727         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5728         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5729         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5730         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5731         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5732         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5733         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5734         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5735         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5736         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5737         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5738         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5739         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5740         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5741         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5742         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5743         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5744         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5745 }
5746
5747 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5748                                      uint32_t wave, uint32_t start,
5749                                      uint32_t size, uint32_t *dst)
5750 {
5751         wave_read_regs(
5752                 adev, simd, wave, 0,
5753                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5754 }
5755
5756
5757 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5758         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5759         .select_se_sh = &gfx_v8_0_select_se_sh,
5760         .read_wave_data = &gfx_v8_0_read_wave_data,
5761         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5762 };
5763
5764 static int gfx_v8_0_early_init(void *handle)
5765 {
5766         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5767
5768         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5769         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5770         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5771         gfx_v8_0_set_ring_funcs(adev);
5772         gfx_v8_0_set_irq_funcs(adev);
5773         gfx_v8_0_set_gds_init(adev);
5774         gfx_v8_0_set_rlc_funcs(adev);
5775
5776         return 0;
5777 }
5778
5779 static int gfx_v8_0_late_init(void *handle)
5780 {
5781         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5782         int r;
5783
5784         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5785         if (r)
5786                 return r;
5787
5788         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5789         if (r)
5790                 return r;
5791
5792         /* requires IBs so do in late init after IB pool is initialized */
5793         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5794         if (r)
5795                 return r;
5796
5797         amdgpu_set_powergating_state(adev,
5798                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5799
5800         return 0;
5801 }
5802
5803 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5804                                                        bool enable)
5805 {
5806         if ((adev->asic_type == CHIP_POLARIS11) ||
5807             (adev->asic_type == CHIP_POLARIS12))
5808                 /* Send msg to SMU via Powerplay */
5809                 amdgpu_set_powergating_state(adev,
5810                                              AMD_IP_BLOCK_TYPE_SMC,
5811                                              enable ?
5812                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5813
5814         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5815 }
5816
5817 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5818                                                         bool enable)
5819 {
5820         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5821 }
5822
5823 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5824                 bool enable)
5825 {
5826         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5827 }
5828
5829 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5830                                           bool enable)
5831 {
5832         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5833 }
5834
5835 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5836                                                 bool enable)
5837 {
5838         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5839
5840         /* Read any GFX register to wake up GFX. */
5841         if (!enable)
5842                 RREG32(mmDB_RENDER_CONTROL);
5843 }
5844
5845 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5846                                           bool enable)
5847 {
5848         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5849                 cz_enable_gfx_cg_power_gating(adev, true);
5850                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5851                         cz_enable_gfx_pipeline_power_gating(adev, true);
5852         } else {
5853                 cz_enable_gfx_cg_power_gating(adev, false);
5854                 cz_enable_gfx_pipeline_power_gating(adev, false);
5855         }
5856 }
5857
5858 static int gfx_v8_0_set_powergating_state(void *handle,
5859                                           enum amd_powergating_state state)
5860 {
5861         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5862         bool enable = (state == AMD_PG_STATE_GATE);
5863
5864         if (amdgpu_sriov_vf(adev))
5865                 return 0;
5866
5867         switch (adev->asic_type) {
5868         case CHIP_CARRIZO:
5869         case CHIP_STONEY:
5870
5871                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5872                         cz_enable_sck_slow_down_on_power_up(adev, true);
5873                         cz_enable_sck_slow_down_on_power_down(adev, true);
5874                 } else {
5875                         cz_enable_sck_slow_down_on_power_up(adev, false);
5876                         cz_enable_sck_slow_down_on_power_down(adev, false);
5877                 }
5878                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5879                         cz_enable_cp_power_gating(adev, true);
5880                 else
5881                         cz_enable_cp_power_gating(adev, false);
5882
5883                 cz_update_gfx_cg_power_gating(adev, enable);
5884
5885                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5886                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5887                 else
5888                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5889
5890                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5891                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5892                 else
5893                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5894                 break;
5895         case CHIP_POLARIS11:
5896         case CHIP_POLARIS12:
5897                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5898                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5899                 else
5900                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5901
5902                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5903                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5904                 else
5905                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5906
5907                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5908                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5909                 else
5910                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5911                 break;
5912         default:
5913                 break;
5914         }
5915
5916         return 0;
5917 }
5918
5919 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5920 {
5921         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5922         int data;
5923
5924         if (amdgpu_sriov_vf(adev))
5925                 *flags = 0;
5926
5927         /* AMD_CG_SUPPORT_GFX_MGCG */
5928         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5929         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5930                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5931
5932         /* AMD_CG_SUPPORT_GFX_CGLG */
5933         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5935                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5936
5937         /* AMD_CG_SUPPORT_GFX_CGLS */
5938         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5939                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5940
5941         /* AMD_CG_SUPPORT_GFX_CGTS */
5942         data = RREG32(mmCGTS_SM_CTRL_REG);
5943         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5944                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5945
5946         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5947         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5948                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5949
5950         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5951         data = RREG32(mmRLC_MEM_SLP_CNTL);
5952         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5953                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5954
5955         /* AMD_CG_SUPPORT_GFX_CP_LS */
5956         data = RREG32(mmCP_MEM_SLP_CNTL);
5957         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5958                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5959 }
5960
5961 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5962                                      uint32_t reg_addr, uint32_t cmd)
5963 {
5964         uint32_t data;
5965
5966         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5967
5968         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5969         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5970
5971         data = RREG32(mmRLC_SERDES_WR_CTRL);
5972         if (adev->asic_type == CHIP_STONEY)
5973                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5974                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5975                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5976                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5977                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5978                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5979                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5980                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5981                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5982         else
5983                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5984                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5985                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5986                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5987                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5988                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5989                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5990                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5991                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5992                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5993                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5994         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5995                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5996                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5997                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5998
5999         WREG32(mmRLC_SERDES_WR_CTRL, data);
6000 }
6001
6002 #define MSG_ENTER_RLC_SAFE_MODE     1
6003 #define MSG_EXIT_RLC_SAFE_MODE      0
6004 #define RLC_GPR_REG2__REQ_MASK 0x00000001
6005 #define RLC_GPR_REG2__REQ__SHIFT 0
6006 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6007 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
6008
6009 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6010 {
6011         u32 data;
6012         unsigned i;
6013
6014         data = RREG32(mmRLC_CNTL);
6015         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6016                 return;
6017
6018         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6019                 data |= RLC_SAFE_MODE__CMD_MASK;
6020                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6021                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6022                 WREG32(mmRLC_SAFE_MODE, data);
6023
6024                 for (i = 0; i < adev->usec_timeout; i++) {
6025                         if ((RREG32(mmRLC_GPM_STAT) &
6026                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6027                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6028                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6029                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6030                                 break;
6031                         udelay(1);
6032                 }
6033
6034                 for (i = 0; i < adev->usec_timeout; i++) {
6035                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6036                                 break;
6037                         udelay(1);
6038                 }
6039                 adev->gfx.rlc.in_safe_mode = true;
6040         }
6041 }
6042
6043 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6044 {
6045         u32 data = 0;
6046         unsigned i;
6047
6048         data = RREG32(mmRLC_CNTL);
6049         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6050                 return;
6051
6052         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6053                 if (adev->gfx.rlc.in_safe_mode) {
6054                         data |= RLC_SAFE_MODE__CMD_MASK;
6055                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6056                         WREG32(mmRLC_SAFE_MODE, data);
6057                         adev->gfx.rlc.in_safe_mode = false;
6058                 }
6059         }
6060
6061         for (i = 0; i < adev->usec_timeout; i++) {
6062                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6063                         break;
6064                 udelay(1);
6065         }
6066 }
6067
6068 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6069         .enter_safe_mode = iceland_enter_rlc_safe_mode,
6070         .exit_safe_mode = iceland_exit_rlc_safe_mode
6071 };
6072
6073 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6074                                                       bool enable)
6075 {
6076         uint32_t temp, data;
6077
6078         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6079
6080         /* It is disabled by HW by default */
6081         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6082                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6083                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6084                                 /* 1 - RLC memory Light sleep */
6085                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6086
6087                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6088                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6089                 }
6090
6091                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6092                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6093                 if (adev->flags & AMD_IS_APU)
6094                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6095                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6096                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6097                 else
6098                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6099                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6100                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6101                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6102
6103                 if (temp != data)
6104                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6105
6106                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6107                 gfx_v8_0_wait_for_rlc_serdes(adev);
6108
6109                 /* 5 - clear mgcg override */
6110                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6111
6112                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6113                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6114                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6115                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6116                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6117                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6118                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6119                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6120                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6121                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6122                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6123                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6124                         if (temp != data)
6125                                 WREG32(mmCGTS_SM_CTRL_REG, data);
6126                 }
6127                 udelay(50);
6128
6129                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6130                 gfx_v8_0_wait_for_rlc_serdes(adev);
6131         } else {
6132                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6133                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6134                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6135                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6136                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6137                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6138                 if (temp != data)
6139                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6140
6141                 /* 2 - disable MGLS in RLC */
6142                 data = RREG32(mmRLC_MEM_SLP_CNTL);
6143                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6144                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6145                         WREG32(mmRLC_MEM_SLP_CNTL, data);
6146                 }
6147
6148                 /* 3 - disable MGLS in CP */
6149                 data = RREG32(mmCP_MEM_SLP_CNTL);
6150                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6151                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6152                         WREG32(mmCP_MEM_SLP_CNTL, data);
6153                 }
6154
6155                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6156                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6157                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6158                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6159                 if (temp != data)
6160                         WREG32(mmCGTS_SM_CTRL_REG, data);
6161
6162                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6163                 gfx_v8_0_wait_for_rlc_serdes(adev);
6164
6165                 /* 6 - set mgcg override */
6166                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6167
6168                 udelay(50);
6169
6170                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6171                 gfx_v8_0_wait_for_rlc_serdes(adev);
6172         }
6173
6174         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6175 }
6176
6177 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6178                                                       bool enable)
6179 {
6180         uint32_t temp, temp1, data, data1;
6181
6182         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6183
6184         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6185
6186         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6187                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6188                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6189                 if (temp1 != data1)
6190                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6191
6192                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6193                 gfx_v8_0_wait_for_rlc_serdes(adev);
6194
6195                 /* 2 - clear cgcg override */
6196                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6197
6198                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6199                 gfx_v8_0_wait_for_rlc_serdes(adev);
6200
6201                 /* 3 - write cmd to set CGLS */
6202                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6203
6204                 /* 4 - enable cgcg */
6205                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6206
6207                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6208                         /* enable cgls*/
6209                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6210
6211                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6212                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6213
6214                         if (temp1 != data1)
6215                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6216                 } else {
6217                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6218                 }
6219
6220                 if (temp != data)
6221                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6222
6223                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6224                  * Cmp_busy/GFX_Idle interrupts
6225                  */
6226                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6227         } else {
6228                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6229                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6230
6231                 /* TEST CGCG */
6232                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6233                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6234                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6235                 if (temp1 != data1)
6236                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6237
6238                 /* read gfx register to wake up cgcg */
6239                 RREG32(mmCB_CGTT_SCLK_CTRL);
6240                 RREG32(mmCB_CGTT_SCLK_CTRL);
6241                 RREG32(mmCB_CGTT_SCLK_CTRL);
6242                 RREG32(mmCB_CGTT_SCLK_CTRL);
6243
6244                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6245                 gfx_v8_0_wait_for_rlc_serdes(adev);
6246
6247                 /* write cmd to Set CGCG Overrride */
6248                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6249
6250                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6251                 gfx_v8_0_wait_for_rlc_serdes(adev);
6252
6253                 /* write cmd to Clear CGLS */
6254                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6255
6256                 /* disable cgcg, cgls should be disabled too. */
6257                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6258                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6259                 if (temp != data)
6260                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6261         }
6262
6263         gfx_v8_0_wait_for_rlc_serdes(adev);
6264
6265         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6266 }
6267 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6268                                             bool enable)
6269 {
6270         if (enable) {
6271                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6272                  * ===  MGCG + MGLS + TS(CG/LS) ===
6273                  */
6274                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6275                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6276         } else {
6277                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6278                  * ===  CGCG + CGLS ===
6279                  */
6280                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6281                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6282         }
6283         return 0;
6284 }
6285
6286 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6287                                           enum amd_clockgating_state state)
6288 {
6289         uint32_t msg_id, pp_state = 0;
6290         uint32_t pp_support_state = 0;
6291         void *pp_handle = adev->powerplay.pp_handle;
6292
6293         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6294                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6295                         pp_support_state = PP_STATE_SUPPORT_LS;
6296                         pp_state = PP_STATE_LS;
6297                 }
6298                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6299                         pp_support_state |= PP_STATE_SUPPORT_CG;
6300                         pp_state |= PP_STATE_CG;
6301                 }
6302                 if (state == AMD_CG_STATE_UNGATE)
6303                         pp_state = 0;
6304
6305                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6306                                 PP_BLOCK_GFX_CG,
6307                                 pp_support_state,
6308                                 pp_state);
6309                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6310         }
6311
6312         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6313                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6314                         pp_support_state = PP_STATE_SUPPORT_LS;
6315                         pp_state = PP_STATE_LS;
6316                 }
6317
6318                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6319                         pp_support_state |= PP_STATE_SUPPORT_CG;
6320                         pp_state |= PP_STATE_CG;
6321                 }
6322
6323                 if (state == AMD_CG_STATE_UNGATE)
6324                         pp_state = 0;
6325
6326                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6327                                 PP_BLOCK_GFX_MG,
6328                                 pp_support_state,
6329                                 pp_state);
6330                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6331         }
6332
6333         return 0;
6334 }
6335
6336 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6337                                           enum amd_clockgating_state state)
6338 {
6339
6340         uint32_t msg_id, pp_state = 0;
6341         uint32_t pp_support_state = 0;
6342         void *pp_handle = adev->powerplay.pp_handle;
6343
6344         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6345                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6346                         pp_support_state = PP_STATE_SUPPORT_LS;
6347                         pp_state = PP_STATE_LS;
6348                 }
6349                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6350                         pp_support_state |= PP_STATE_SUPPORT_CG;
6351                         pp_state |= PP_STATE_CG;
6352                 }
6353                 if (state == AMD_CG_STATE_UNGATE)
6354                         pp_state = 0;
6355
6356                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6357                                 PP_BLOCK_GFX_CG,
6358                                 pp_support_state,
6359                                 pp_state);
6360                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6361         }
6362
6363         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6364                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6365                         pp_support_state = PP_STATE_SUPPORT_LS;
6366                         pp_state = PP_STATE_LS;
6367                 }
6368                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6369                         pp_support_state |= PP_STATE_SUPPORT_CG;
6370                         pp_state |= PP_STATE_CG;
6371                 }
6372                 if (state == AMD_CG_STATE_UNGATE)
6373                         pp_state = 0;
6374
6375                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6376                                 PP_BLOCK_GFX_3D,
6377                                 pp_support_state,
6378                                 pp_state);
6379                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6380         }
6381
6382         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6383                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6384                         pp_support_state = PP_STATE_SUPPORT_LS;
6385                         pp_state = PP_STATE_LS;
6386                 }
6387
6388                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6389                         pp_support_state |= PP_STATE_SUPPORT_CG;
6390                         pp_state |= PP_STATE_CG;
6391                 }
6392
6393                 if (state == AMD_CG_STATE_UNGATE)
6394                         pp_state = 0;
6395
6396                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6397                                 PP_BLOCK_GFX_MG,
6398                                 pp_support_state,
6399                                 pp_state);
6400                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6401         }
6402
6403         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6404                 pp_support_state = PP_STATE_SUPPORT_LS;
6405
6406                 if (state == AMD_CG_STATE_UNGATE)
6407                         pp_state = 0;
6408                 else
6409                         pp_state = PP_STATE_LS;
6410
6411                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6412                                 PP_BLOCK_GFX_RLC,
6413                                 pp_support_state,
6414                                 pp_state);
6415                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6416         }
6417
6418         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6419                 pp_support_state = PP_STATE_SUPPORT_LS;
6420
6421                 if (state == AMD_CG_STATE_UNGATE)
6422                         pp_state = 0;
6423                 else
6424                         pp_state = PP_STATE_LS;
6425                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6426                         PP_BLOCK_GFX_CP,
6427                         pp_support_state,
6428                         pp_state);
6429                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6430         }
6431
6432         return 0;
6433 }
6434
6435 static int gfx_v8_0_set_clockgating_state(void *handle,
6436                                           enum amd_clockgating_state state)
6437 {
6438         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6439
6440         if (amdgpu_sriov_vf(adev))
6441                 return 0;
6442
6443         switch (adev->asic_type) {
6444         case CHIP_FIJI:
6445         case CHIP_CARRIZO:
6446         case CHIP_STONEY:
6447                 gfx_v8_0_update_gfx_clock_gating(adev,
6448                                                  state == AMD_CG_STATE_GATE);
6449                 break;
6450         case CHIP_TONGA:
6451                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6452                 break;
6453         case CHIP_POLARIS10:
6454         case CHIP_POLARIS11:
6455         case CHIP_POLARIS12:
6456                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6457                 break;
6458         default:
6459                 break;
6460         }
6461         return 0;
6462 }
6463
6464 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6465 {
6466         return ring->adev->wb.wb[ring->rptr_offs];
6467 }
6468
6469 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6470 {
6471         struct amdgpu_device *adev = ring->adev;
6472
6473         if (ring->use_doorbell)
6474                 /* XXX check if swapping is necessary on BE */
6475                 return ring->adev->wb.wb[ring->wptr_offs];
6476         else
6477                 return RREG32(mmCP_RB0_WPTR);
6478 }
6479
6480 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6481 {
6482         struct amdgpu_device *adev = ring->adev;
6483
6484         if (ring->use_doorbell) {
6485                 /* XXX check if swapping is necessary on BE */
6486                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6487                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6488         } else {
6489                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6490                 (void)RREG32(mmCP_RB0_WPTR);
6491         }
6492 }
6493
6494 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6495 {
6496         u32 ref_and_mask, reg_mem_engine;
6497
6498         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6499             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6500                 switch (ring->me) {
6501                 case 1:
6502                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6503                         break;
6504                 case 2:
6505                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6506                         break;
6507                 default:
6508                         return;
6509                 }
6510                 reg_mem_engine = 0;
6511         } else {
6512                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6513                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6514         }
6515
6516         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6517         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6518                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6519                                  reg_mem_engine));
6520         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6521         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6522         amdgpu_ring_write(ring, ref_and_mask);
6523         amdgpu_ring_write(ring, ref_and_mask);
6524         amdgpu_ring_write(ring, 0x20); /* poll interval */
6525 }
6526
6527 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6528 {
6529         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6530         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6531                 EVENT_INDEX(4));
6532
6533         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6534         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6535                 EVENT_INDEX(0));
6536 }
6537
6538
6539 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6540 {
6541         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6542         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6543                                  WRITE_DATA_DST_SEL(0) |
6544                                  WR_CONFIRM));
6545         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6546         amdgpu_ring_write(ring, 0);
6547         amdgpu_ring_write(ring, 1);
6548
6549 }
6550
6551 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6552                                       struct amdgpu_ib *ib,
6553                                       unsigned vm_id, bool ctx_switch)
6554 {
6555         u32 header, control = 0;
6556
6557         if (ib->flags & AMDGPU_IB_FLAG_CE)
6558                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6559         else
6560                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6561
6562         control |= ib->length_dw | (vm_id << 24);
6563
6564         amdgpu_ring_write(ring, header);
6565         amdgpu_ring_write(ring,
6566 #ifdef __BIG_ENDIAN
6567                           (2 << 0) |
6568 #endif
6569                           (ib->gpu_addr & 0xFFFFFFFC));
6570         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6571         amdgpu_ring_write(ring, control);
6572 }
6573
6574 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6575                                           struct amdgpu_ib *ib,
6576                                           unsigned vm_id, bool ctx_switch)
6577 {
6578         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6579
6580         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6581         amdgpu_ring_write(ring,
6582 #ifdef __BIG_ENDIAN
6583                                 (2 << 0) |
6584 #endif
6585                                 (ib->gpu_addr & 0xFFFFFFFC));
6586         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6587         amdgpu_ring_write(ring, control);
6588 }
6589
6590 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6591                                          u64 seq, unsigned flags)
6592 {
6593         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6594         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6595
6596         /* EVENT_WRITE_EOP - flush caches, send int */
6597         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6598         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6599                                  EOP_TC_ACTION_EN |
6600                                  EOP_TC_WB_ACTION_EN |
6601                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6602                                  EVENT_INDEX(5)));
6603         amdgpu_ring_write(ring, addr & 0xfffffffc);
6604         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6605                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6606         amdgpu_ring_write(ring, lower_32_bits(seq));
6607         amdgpu_ring_write(ring, upper_32_bits(seq));
6608
6609 }
6610
6611 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6612 {
6613         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6614         uint32_t seq = ring->fence_drv.sync_seq;
6615         uint64_t addr = ring->fence_drv.gpu_addr;
6616
6617         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6618         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6619                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6620                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6621         amdgpu_ring_write(ring, addr & 0xfffffffc);
6622         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6623         amdgpu_ring_write(ring, seq);
6624         amdgpu_ring_write(ring, 0xffffffff);
6625         amdgpu_ring_write(ring, 4); /* poll interval */
6626 }
6627
6628 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6629                                         unsigned vm_id, uint64_t pd_addr)
6630 {
6631         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6632
6633         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6634         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6635                                  WRITE_DATA_DST_SEL(0)) |
6636                                  WR_CONFIRM);
6637         if (vm_id < 8) {
6638                 amdgpu_ring_write(ring,
6639                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6640         } else {
6641                 amdgpu_ring_write(ring,
6642                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6643         }
6644         amdgpu_ring_write(ring, 0);
6645         amdgpu_ring_write(ring, pd_addr >> 12);
6646
6647         /* bits 0-15 are the VM contexts0-15 */
6648         /* invalidate the cache */
6649         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6650         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6651                                  WRITE_DATA_DST_SEL(0)));
6652         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6653         amdgpu_ring_write(ring, 0);
6654         amdgpu_ring_write(ring, 1 << vm_id);
6655
6656         /* wait for the invalidate to complete */
6657         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6658         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6659                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6660                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6661         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6662         amdgpu_ring_write(ring, 0);
6663         amdgpu_ring_write(ring, 0); /* ref */
6664         amdgpu_ring_write(ring, 0); /* mask */
6665         amdgpu_ring_write(ring, 0x20); /* poll interval */
6666
6667         /* compute doesn't have PFP */
6668         if (usepfp) {
6669                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6670                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6671                 amdgpu_ring_write(ring, 0x0);
6672                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6673                 amdgpu_ring_insert_nop(ring, 128);
6674         }
6675 }
6676
6677 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6678 {
6679         return ring->adev->wb.wb[ring->wptr_offs];
6680 }
6681
6682 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6683 {
6684         struct amdgpu_device *adev = ring->adev;
6685
6686         /* XXX check if swapping is necessary on BE */
6687         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6688         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6689 }
6690
6691 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6692                                              u64 addr, u64 seq,
6693                                              unsigned flags)
6694 {
6695         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6696         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6697
6698         /* RELEASE_MEM - flush caches, send int */
6699         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6700         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6701                                  EOP_TC_ACTION_EN |
6702                                  EOP_TC_WB_ACTION_EN |
6703                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6704                                  EVENT_INDEX(5)));
6705         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6706         amdgpu_ring_write(ring, addr & 0xfffffffc);
6707         amdgpu_ring_write(ring, upper_32_bits(addr));
6708         amdgpu_ring_write(ring, lower_32_bits(seq));
6709         amdgpu_ring_write(ring, upper_32_bits(seq));
6710 }
6711
6712 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6713                                          u64 seq, unsigned int flags)
6714 {
6715         /* we only allocate 32bit for each seq wb address */
6716         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6717
6718         /* write fence seq to the "addr" */
6719         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6720         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6721                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6722         amdgpu_ring_write(ring, lower_32_bits(addr));
6723         amdgpu_ring_write(ring, upper_32_bits(addr));
6724         amdgpu_ring_write(ring, lower_32_bits(seq));
6725
6726         if (flags & AMDGPU_FENCE_FLAG_INT) {
6727                 /* set register to trigger INT */
6728                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6729                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6730                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6731                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6732                 amdgpu_ring_write(ring, 0);
6733                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6734         }
6735 }
6736
6737 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6738 {
6739         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6740         amdgpu_ring_write(ring, 0);
6741 }
6742
6743 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6744 {
6745         uint32_t dw2 = 0;
6746
6747         if (amdgpu_sriov_vf(ring->adev))
6748                 gfx_v8_0_ring_emit_ce_meta_init(ring,
6749                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6750
6751         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6752         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6753                 gfx_v8_0_ring_emit_vgt_flush(ring);
6754                 /* set load_global_config & load_global_uconfig */
6755                 dw2 |= 0x8001;
6756                 /* set load_cs_sh_regs */
6757                 dw2 |= 0x01000000;
6758                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6759                 dw2 |= 0x10002;
6760
6761                 /* set load_ce_ram if preamble presented */
6762                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6763                         dw2 |= 0x10000000;
6764         } else {
6765                 /* still load_ce_ram if this is the first time preamble presented
6766                  * although there is no context switch happens.
6767                  */
6768                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6769                         dw2 |= 0x10000000;
6770         }
6771
6772         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6773         amdgpu_ring_write(ring, dw2);
6774         amdgpu_ring_write(ring, 0);
6775
6776         if (amdgpu_sriov_vf(ring->adev))
6777                 gfx_v8_0_ring_emit_de_meta_init(ring,
6778                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6779 }
6780
6781 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6782 {
6783         struct amdgpu_device *adev = ring->adev;
6784
6785         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6786         amdgpu_ring_write(ring, 0 |     /* src: register*/
6787                                 (5 << 8) |      /* dst: memory */
6788                                 (1 << 20));     /* write confirm */
6789         amdgpu_ring_write(ring, reg);
6790         amdgpu_ring_write(ring, 0);
6791         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6792                                 adev->virt.reg_val_offs * 4));
6793         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6794                                 adev->virt.reg_val_offs * 4));
6795 }
6796
6797 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6798                                   uint32_t val)
6799 {
6800         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6801         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6802         amdgpu_ring_write(ring, reg);
6803         amdgpu_ring_write(ring, 0);
6804         amdgpu_ring_write(ring, val);
6805 }
6806
6807 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6808                                                  enum amdgpu_interrupt_state state)
6809 {
6810         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6811                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6812 }
6813
6814 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6815                                                      int me, int pipe,
6816                                                      enum amdgpu_interrupt_state state)
6817 {
6818         /*
6819          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6820          * handles the setting of interrupts for this specific pipe. All other
6821          * pipes' interrupts are set by amdkfd.
6822          */
6823
6824         if (me == 1) {
6825                 switch (pipe) {
6826                 case 0:
6827                         break;
6828                 default:
6829                         DRM_DEBUG("invalid pipe %d\n", pipe);
6830                         return;
6831                 }
6832         } else {
6833                 DRM_DEBUG("invalid me %d\n", me);
6834                 return;
6835         }
6836
6837         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6838                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6839 }
6840
6841 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6842                                              struct amdgpu_irq_src *source,
6843                                              unsigned type,
6844                                              enum amdgpu_interrupt_state state)
6845 {
6846         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6847                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6848
6849         return 0;
6850 }
6851
6852 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6853                                               struct amdgpu_irq_src *source,
6854                                               unsigned type,
6855                                               enum amdgpu_interrupt_state state)
6856 {
6857         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6858                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6859
6860         return 0;
6861 }
6862
6863 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6864                                             struct amdgpu_irq_src *src,
6865                                             unsigned type,
6866                                             enum amdgpu_interrupt_state state)
6867 {
6868         switch (type) {
6869         case AMDGPU_CP_IRQ_GFX_EOP:
6870                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6871                 break;
6872         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6873                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6874                 break;
6875         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6876                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6877                 break;
6878         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6879                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6880                 break;
6881         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6882                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6883                 break;
6884         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6885                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6886                 break;
6887         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6888                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6889                 break;
6890         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6891                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6892                 break;
6893         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6894                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6895                 break;
6896         default:
6897                 break;
6898         }
6899         return 0;
6900 }
6901
6902 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6903                             struct amdgpu_irq_src *source,
6904                             struct amdgpu_iv_entry *entry)
6905 {
6906         int i;
6907         u8 me_id, pipe_id, queue_id;
6908         struct amdgpu_ring *ring;
6909
6910         DRM_DEBUG("IH: CP EOP\n");
6911         me_id = (entry->ring_id & 0x0c) >> 2;
6912         pipe_id = (entry->ring_id & 0x03) >> 0;
6913         queue_id = (entry->ring_id & 0x70) >> 4;
6914
6915         switch (me_id) {
6916         case 0:
6917                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6918                 break;
6919         case 1:
6920         case 2:
6921                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6922                         ring = &adev->gfx.compute_ring[i];
6923                         /* Per-queue interrupt is supported for MEC starting from VI.
6924                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6925                           */
6926                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6927                                 amdgpu_fence_process(ring);
6928                 }
6929                 break;
6930         }
6931         return 0;
6932 }
6933
6934 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6935                                  struct amdgpu_irq_src *source,
6936                                  struct amdgpu_iv_entry *entry)
6937 {
6938         DRM_ERROR("Illegal register access in command stream\n");
6939         schedule_work(&adev->reset_work);
6940         return 0;
6941 }
6942
6943 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6944                                   struct amdgpu_irq_src *source,
6945                                   struct amdgpu_iv_entry *entry)
6946 {
6947         DRM_ERROR("Illegal instruction in command stream\n");
6948         schedule_work(&adev->reset_work);
6949         return 0;
6950 }
6951
6952 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6953                                             struct amdgpu_irq_src *src,
6954                                             unsigned int type,
6955                                             enum amdgpu_interrupt_state state)
6956 {
6957         uint32_t tmp, target;
6958         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6959
6960         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
6961
6962         if (ring->me == 1)
6963                 target = mmCP_ME1_PIPE0_INT_CNTL;
6964         else
6965                 target = mmCP_ME2_PIPE0_INT_CNTL;
6966         target += ring->pipe;
6967
6968         switch (type) {
6969         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6970                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6971                         tmp = RREG32(mmCPC_INT_CNTL);
6972                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6973                                                  GENERIC2_INT_ENABLE, 0);
6974                         WREG32(mmCPC_INT_CNTL, tmp);
6975
6976                         tmp = RREG32(target);
6977                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6978                                                  GENERIC2_INT_ENABLE, 0);
6979                         WREG32(target, tmp);
6980                 } else {
6981                         tmp = RREG32(mmCPC_INT_CNTL);
6982                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6983                                                  GENERIC2_INT_ENABLE, 1);
6984                         WREG32(mmCPC_INT_CNTL, tmp);
6985
6986                         tmp = RREG32(target);
6987                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6988                                                  GENERIC2_INT_ENABLE, 1);
6989                         WREG32(target, tmp);
6990                 }
6991                 break;
6992         default:
6993                 BUG(); /* kiq only support GENERIC2_INT now */
6994                 break;
6995         }
6996         return 0;
6997 }
6998
6999 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7000                             struct amdgpu_irq_src *source,
7001                             struct amdgpu_iv_entry *entry)
7002 {
7003         u8 me_id, pipe_id, queue_id;
7004         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7005
7006         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
7007
7008         me_id = (entry->ring_id & 0x0c) >> 2;
7009         pipe_id = (entry->ring_id & 0x03) >> 0;
7010         queue_id = (entry->ring_id & 0x70) >> 4;
7011         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7012                    me_id, pipe_id, queue_id);
7013
7014         amdgpu_fence_process(ring);
7015         return 0;
7016 }
7017
7018 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7019         .name = "gfx_v8_0",
7020         .early_init = gfx_v8_0_early_init,
7021         .late_init = gfx_v8_0_late_init,
7022         .sw_init = gfx_v8_0_sw_init,
7023         .sw_fini = gfx_v8_0_sw_fini,
7024         .hw_init = gfx_v8_0_hw_init,
7025         .hw_fini = gfx_v8_0_hw_fini,
7026         .suspend = gfx_v8_0_suspend,
7027         .resume = gfx_v8_0_resume,
7028         .is_idle = gfx_v8_0_is_idle,
7029         .wait_for_idle = gfx_v8_0_wait_for_idle,
7030         .check_soft_reset = gfx_v8_0_check_soft_reset,
7031         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7032         .soft_reset = gfx_v8_0_soft_reset,
7033         .post_soft_reset = gfx_v8_0_post_soft_reset,
7034         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7035         .set_powergating_state = gfx_v8_0_set_powergating_state,
7036         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7037 };
7038
7039 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7040         .type = AMDGPU_RING_TYPE_GFX,
7041         .align_mask = 0xff,
7042         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7043         .support_64bit_ptrs = false,
7044         .get_rptr = gfx_v8_0_ring_get_rptr,
7045         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7046         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7047         .emit_frame_size =
7048                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7049                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7050                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7051                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7052                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7053                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
7054                 2 + /* gfx_v8_ring_emit_sb */
7055                 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
7056         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7057         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7058         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7059         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7060         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7061         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7062         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7063         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7064         .test_ring = gfx_v8_0_ring_test_ring,
7065         .test_ib = gfx_v8_0_ring_test_ib,
7066         .insert_nop = amdgpu_ring_insert_nop,
7067         .pad_ib = amdgpu_ring_generic_pad_ib,
7068         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7069         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7070 };
7071
7072 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7073         .type = AMDGPU_RING_TYPE_COMPUTE,
7074         .align_mask = 0xff,
7075         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7076         .support_64bit_ptrs = false,
7077         .get_rptr = gfx_v8_0_ring_get_rptr,
7078         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7079         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7080         .emit_frame_size =
7081                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7082                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7083                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7084                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7085                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7086                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7087         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7088         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7089         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7090         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7091         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7092         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7093         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7094         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7095         .test_ring = gfx_v8_0_ring_test_ring,
7096         .test_ib = gfx_v8_0_ring_test_ib,
7097         .insert_nop = amdgpu_ring_insert_nop,
7098         .pad_ib = amdgpu_ring_generic_pad_ib,
7099 };
7100
7101 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7102         .type = AMDGPU_RING_TYPE_KIQ,
7103         .align_mask = 0xff,
7104         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7105         .support_64bit_ptrs = false,
7106         .get_rptr = gfx_v8_0_ring_get_rptr,
7107         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7108         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7109         .emit_frame_size =
7110                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7111                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7112                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7113                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7114                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7115                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7116         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7117         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7118         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7119         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7120         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7121         .test_ring = gfx_v8_0_ring_test_ring,
7122         .test_ib = gfx_v8_0_ring_test_ib,
7123         .insert_nop = amdgpu_ring_insert_nop,
7124         .pad_ib = amdgpu_ring_generic_pad_ib,
7125         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7126         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7127 };
7128
7129 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7130 {
7131         int i;
7132
7133         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7134
7135         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7136                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7137
7138         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7139                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7140 }
7141
7142 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7143         .set = gfx_v8_0_set_eop_interrupt_state,
7144         .process = gfx_v8_0_eop_irq,
7145 };
7146
7147 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7148         .set = gfx_v8_0_set_priv_reg_fault_state,
7149         .process = gfx_v8_0_priv_reg_irq,
7150 };
7151
7152 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7153         .set = gfx_v8_0_set_priv_inst_fault_state,
7154         .process = gfx_v8_0_priv_inst_irq,
7155 };
7156
7157 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7158         .set = gfx_v8_0_kiq_set_interrupt_state,
7159         .process = gfx_v8_0_kiq_irq,
7160 };
7161
7162 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7163 {
7164         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7165         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7166
7167         adev->gfx.priv_reg_irq.num_types = 1;
7168         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7169
7170         adev->gfx.priv_inst_irq.num_types = 1;
7171         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7172
7173         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7174         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7175 }
7176
7177 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7178 {
7179         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7180 }
7181
7182 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7183 {
7184         /* init asci gds info */
7185         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7186         adev->gds.gws.total_size = 64;
7187         adev->gds.oa.total_size = 16;
7188
7189         if (adev->gds.mem.total_size == 64 * 1024) {
7190                 adev->gds.mem.gfx_partition_size = 4096;
7191                 adev->gds.mem.cs_partition_size = 4096;
7192
7193                 adev->gds.gws.gfx_partition_size = 4;
7194                 adev->gds.gws.cs_partition_size = 4;
7195
7196                 adev->gds.oa.gfx_partition_size = 4;
7197                 adev->gds.oa.cs_partition_size = 1;
7198         } else {
7199                 adev->gds.mem.gfx_partition_size = 1024;
7200                 adev->gds.mem.cs_partition_size = 1024;
7201
7202                 adev->gds.gws.gfx_partition_size = 16;
7203                 adev->gds.gws.cs_partition_size = 16;
7204
7205                 adev->gds.oa.gfx_partition_size = 4;
7206                 adev->gds.oa.cs_partition_size = 4;
7207         }
7208 }
7209
7210 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7211                                                  u32 bitmap)
7212 {
7213         u32 data;
7214
7215         if (!bitmap)
7216                 return;
7217
7218         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7219         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7220
7221         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7222 }
7223
7224 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7225 {
7226         u32 data, mask;
7227
7228         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7229                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7230
7231         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7232
7233         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7234 }
7235
7236 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7237 {
7238         int i, j, k, counter, active_cu_number = 0;
7239         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7240         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7241         unsigned disable_masks[4 * 2];
7242
7243         memset(cu_info, 0, sizeof(*cu_info));
7244
7245         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7246
7247         mutex_lock(&adev->grbm_idx_mutex);
7248         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7249                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7250                         mask = 1;
7251                         ao_bitmap = 0;
7252                         counter = 0;
7253                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7254                         if (i < 4 && j < 2)
7255                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7256                                         adev, disable_masks[i * 2 + j]);
7257                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7258                         cu_info->bitmap[i][j] = bitmap;
7259
7260                         for (k = 0; k < 16; k ++) {
7261                                 if (bitmap & mask) {
7262                                         if (counter < 2)
7263                                                 ao_bitmap |= mask;
7264                                         counter ++;
7265                                 }
7266                                 mask <<= 1;
7267                         }
7268                         active_cu_number += counter;
7269                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7270                 }
7271         }
7272         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7273         mutex_unlock(&adev->grbm_idx_mutex);
7274
7275         cu_info->number = active_cu_number;
7276         cu_info->ao_cu_mask = ao_cu_mask;
7277 }
7278
7279 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7280 {
7281         .type = AMD_IP_BLOCK_TYPE_GFX,
7282         .major = 8,
7283         .minor = 0,
7284         .rev = 0,
7285         .funcs = &gfx_v8_0_ip_funcs,
7286 };
7287
7288 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7289 {
7290         .type = AMD_IP_BLOCK_TYPE_GFX,
7291         .major = 8,
7292         .minor = 1,
7293         .rev = 0,
7294         .funcs = &gfx_v8_0_ip_funcs,
7295 };
7296
7297 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7298 {
7299         uint64_t ce_payload_addr;
7300         int cnt_ce;
7301         static union {
7302                 struct vi_ce_ib_state regular;
7303                 struct vi_ce_ib_state_chained_ib chained;
7304         } ce_payload = {};
7305
7306         if (ring->adev->virt.chained_ib_support) {
7307                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7308                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7309         } else {
7310                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
7311                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7312         }
7313
7314         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7315         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7316                                 WRITE_DATA_DST_SEL(8) |
7317                                 WR_CONFIRM) |
7318                                 WRITE_DATA_CACHE_POLICY(0));
7319         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7320         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7321         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7322 }
7323
7324 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7325 {
7326         uint64_t de_payload_addr, gds_addr;
7327         int cnt_de;
7328         static union {
7329                 struct vi_de_ib_state regular;
7330                 struct vi_de_ib_state_chained_ib chained;
7331         } de_payload = {};
7332
7333         gds_addr = csa_addr + 4096;
7334         if (ring->adev->virt.chained_ib_support) {
7335                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7336                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7337                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7338                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7339         } else {
7340                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7341                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7342                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7343                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7344         }
7345
7346         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7347         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7348                                 WRITE_DATA_DST_SEL(8) |
7349                                 WR_CONFIRM) |
7350                                 WRITE_DATA_CACHE_POLICY(0));
7351         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7352         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7353         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7354 }
7355
7356 /* create MQD for each compute queue */
7357 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7358 {
7359         struct amdgpu_ring *ring = NULL;
7360         int r, i;
7361
7362         /* create MQD for KIQ */
7363         ring = &adev->gfx.kiq.ring;
7364         if (!ring->mqd_obj) {
7365                 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7366                                             AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7367                                             &ring->mqd_gpu_addr, &ring->mqd_ptr);
7368                 if (r) {
7369                         dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7370                         return r;
7371                 }
7372
7373                 /* prepare MQD backup */
7374                 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7375                 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7376                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7377         }
7378
7379         /* create MQD for each KCQ */
7380         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7381                 ring = &adev->gfx.compute_ring[i];
7382                 if (!ring->mqd_obj) {
7383                         r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7384                                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7385                                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
7386                         if (r) {
7387                                 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7388                                 return r;
7389                         }
7390
7391                         /* prepare MQD backup */
7392                         adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7393                         if (!adev->gfx.mec.mqd_backup[i])
7394                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7395                 }
7396         }
7397
7398         return 0;
7399 }
7400
7401 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7402 {
7403         struct amdgpu_ring *ring = NULL;
7404         int i;
7405
7406         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7407                 ring = &adev->gfx.compute_ring[i];
7408                 kfree(adev->gfx.mec.mqd_backup[i]);
7409                 amdgpu_bo_free_kernel(&ring->mqd_obj,
7410                                       &ring->mqd_gpu_addr,
7411                                       &ring->mqd_ptr);
7412         }
7413
7414         ring = &adev->gfx.kiq.ring;
7415         kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7416         amdgpu_bo_free_kernel(&ring->mqd_obj,
7417                               &ring->mqd_gpu_addr,
7418                               &ring->mqd_ptr);
7419 }