]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
e0fa0d30e162a0fbe4fa24821b4b0765920ba5a4
[karo-tx-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
664
665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666 {
667         switch (adev->asic_type) {
668         case CHIP_TOPAZ:
669                 amdgpu_program_register_sequence(adev,
670                                                  iceland_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  golden_settings_iceland_a11,
674                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675                 amdgpu_program_register_sequence(adev,
676                                                  iceland_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
678                 break;
679         case CHIP_FIJI:
680                 amdgpu_program_register_sequence(adev,
681                                                  fiji_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_fiji_a10,
685                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686                 amdgpu_program_register_sequence(adev,
687                                                  fiji_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
689                 break;
690
691         case CHIP_TONGA:
692                 amdgpu_program_register_sequence(adev,
693                                                  tonga_mgcg_cgcg_init,
694                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695                 amdgpu_program_register_sequence(adev,
696                                                  golden_settings_tonga_a11,
697                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698                 amdgpu_program_register_sequence(adev,
699                                                  tonga_golden_common_all,
700                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
701                 break;
702         case CHIP_POLARIS11:
703         case CHIP_POLARIS12:
704                 amdgpu_program_register_sequence(adev,
705                                                  golden_settings_polaris11_a11,
706                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
707                 amdgpu_program_register_sequence(adev,
708                                                  polaris11_golden_common_all,
709                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
710                 break;
711         case CHIP_POLARIS10:
712                 amdgpu_program_register_sequence(adev,
713                                                  golden_settings_polaris10_a11,
714                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
715                 amdgpu_program_register_sequence(adev,
716                                                  polaris10_golden_common_all,
717                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
718                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
719                 if (adev->pdev->revision == 0xc7 &&
720                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
723                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725                 }
726                 break;
727         case CHIP_CARRIZO:
728                 amdgpu_program_register_sequence(adev,
729                                                  cz_mgcg_cgcg_init,
730                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731                 amdgpu_program_register_sequence(adev,
732                                                  cz_golden_settings_a11,
733                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734                 amdgpu_program_register_sequence(adev,
735                                                  cz_golden_common_all,
736                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
737                 break;
738         case CHIP_STONEY:
739                 amdgpu_program_register_sequence(adev,
740                                                  stoney_mgcg_cgcg_init,
741                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742                 amdgpu_program_register_sequence(adev,
743                                                  stoney_golden_settings_a11,
744                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745                 amdgpu_program_register_sequence(adev,
746                                                  stoney_golden_common_all,
747                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
748                 break;
749         default:
750                 break;
751         }
752 }
753
754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755 {
756         adev->gfx.scratch.num_reg = 7;
757         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
758         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
759 }
760
761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762 {
763         struct amdgpu_device *adev = ring->adev;
764         uint32_t scratch;
765         uint32_t tmp = 0;
766         unsigned i;
767         int r;
768
769         r = amdgpu_gfx_scratch_get(adev, &scratch);
770         if (r) {
771                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772                 return r;
773         }
774         WREG32(scratch, 0xCAFEDEAD);
775         r = amdgpu_ring_alloc(ring, 3);
776         if (r) {
777                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778                           ring->idx, r);
779                 amdgpu_gfx_scratch_free(adev, scratch);
780                 return r;
781         }
782         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784         amdgpu_ring_write(ring, 0xDEADBEEF);
785         amdgpu_ring_commit(ring);
786
787         for (i = 0; i < adev->usec_timeout; i++) {
788                 tmp = RREG32(scratch);
789                 if (tmp == 0xDEADBEEF)
790                         break;
791                 DRM_UDELAY(1);
792         }
793         if (i < adev->usec_timeout) {
794                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795                          ring->idx, i);
796         } else {
797                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798                           ring->idx, scratch, tmp);
799                 r = -EINVAL;
800         }
801         amdgpu_gfx_scratch_free(adev, scratch);
802         return r;
803 }
804
805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
806 {
807         struct amdgpu_device *adev = ring->adev;
808         struct amdgpu_ib ib;
809         struct dma_fence *f = NULL;
810         uint32_t scratch;
811         uint32_t tmp = 0;
812         long r;
813
814         r = amdgpu_gfx_scratch_get(adev, &scratch);
815         if (r) {
816                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
817                 return r;
818         }
819         WREG32(scratch, 0xCAFEDEAD);
820         memset(&ib, 0, sizeof(ib));
821         r = amdgpu_ib_get(adev, NULL, 256, &ib);
822         if (r) {
823                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
824                 goto err1;
825         }
826         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828         ib.ptr[2] = 0xDEADBEEF;
829         ib.length_dw = 3;
830
831         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
832         if (r)
833                 goto err2;
834
835         r = dma_fence_wait_timeout(f, false, timeout);
836         if (r == 0) {
837                 DRM_ERROR("amdgpu: IB test timed out.\n");
838                 r = -ETIMEDOUT;
839                 goto err2;
840         } else if (r < 0) {
841                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
842                 goto err2;
843         }
844         tmp = RREG32(scratch);
845         if (tmp == 0xDEADBEEF) {
846                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
847                 r = 0;
848         } else {
849                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850                           scratch, tmp);
851                 r = -EINVAL;
852         }
853 err2:
854         amdgpu_ib_free(adev, &ib, NULL);
855         dma_fence_put(f);
856 err1:
857         amdgpu_gfx_scratch_free(adev, scratch);
858         return r;
859 }
860
861
862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863         release_firmware(adev->gfx.pfp_fw);
864         adev->gfx.pfp_fw = NULL;
865         release_firmware(adev->gfx.me_fw);
866         adev->gfx.me_fw = NULL;
867         release_firmware(adev->gfx.ce_fw);
868         adev->gfx.ce_fw = NULL;
869         release_firmware(adev->gfx.rlc_fw);
870         adev->gfx.rlc_fw = NULL;
871         release_firmware(adev->gfx.mec_fw);
872         adev->gfx.mec_fw = NULL;
873         if ((adev->asic_type != CHIP_STONEY) &&
874             (adev->asic_type != CHIP_TOPAZ))
875                 release_firmware(adev->gfx.mec2_fw);
876         adev->gfx.mec2_fw = NULL;
877
878         kfree(adev->gfx.rlc.register_list_format);
879 }
880
881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882 {
883         const char *chip_name;
884         char fw_name[30];
885         int err;
886         struct amdgpu_firmware_info *info = NULL;
887         const struct common_firmware_header *header = NULL;
888         const struct gfx_firmware_header_v1_0 *cp_hdr;
889         const struct rlc_firmware_header_v2_0 *rlc_hdr;
890         unsigned int *tmp = NULL, i;
891
892         DRM_DEBUG("\n");
893
894         switch (adev->asic_type) {
895         case CHIP_TOPAZ:
896                 chip_name = "topaz";
897                 break;
898         case CHIP_TONGA:
899                 chip_name = "tonga";
900                 break;
901         case CHIP_CARRIZO:
902                 chip_name = "carrizo";
903                 break;
904         case CHIP_FIJI:
905                 chip_name = "fiji";
906                 break;
907         case CHIP_POLARIS11:
908                 chip_name = "polaris11";
909                 break;
910         case CHIP_POLARIS10:
911                 chip_name = "polaris10";
912                 break;
913         case CHIP_POLARIS12:
914                 chip_name = "polaris12";
915                 break;
916         case CHIP_STONEY:
917                 chip_name = "stoney";
918                 break;
919         default:
920                 BUG();
921         }
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
924         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
935         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.me_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943
944         /* chain ib ucode isn't formal released, just disable it by far
945          * TODO: when ucod ready we should use ucode version to judge if
946          * chain-ib support or not.
947          */
948         adev->virt.chained_ib_support = false;
949
950         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
951
952         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
953         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954         if (err)
955                 goto out;
956         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957         if (err)
958                 goto out;
959         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
962
963         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
964         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965         if (err)
966                 goto out;
967         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
968         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972         adev->gfx.rlc.save_and_restore_offset =
973                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
974         adev->gfx.rlc.clear_state_descriptor_offset =
975                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976         adev->gfx.rlc.avail_scratch_ram_locations =
977                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978         adev->gfx.rlc.reg_restore_list_size =
979                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
980         adev->gfx.rlc.reg_list_format_start =
981                         le32_to_cpu(rlc_hdr->reg_list_format_start);
982         adev->gfx.rlc.reg_list_format_separate_start =
983                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984         adev->gfx.rlc.starting_offsets_start =
985                         le32_to_cpu(rlc_hdr->starting_offsets_start);
986         adev->gfx.rlc.reg_list_format_size_bytes =
987                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988         adev->gfx.rlc.reg_list_size_bytes =
989                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991         adev->gfx.rlc.register_list_format =
992                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995         if (!adev->gfx.rlc.register_list_format) {
996                 err = -ENOMEM;
997                 goto out;
998         }
999
1000         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1001                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
1007         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1008                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1011
1012         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1013         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014         if (err)
1015                 goto out;
1016         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017         if (err)
1018                 goto out;
1019         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1022
1023         if ((adev->asic_type != CHIP_STONEY) &&
1024             (adev->asic_type != CHIP_TOPAZ)) {
1025                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027                 if (!err) {
1028                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029                         if (err)
1030                                 goto out;
1031                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032                                 adev->gfx.mec2_fw->data;
1033                         adev->gfx.mec2_fw_version =
1034                                 le32_to_cpu(cp_hdr->header.ucode_version);
1035                         adev->gfx.mec2_feature_version =
1036                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1037                 } else {
1038                         err = 0;
1039                         adev->gfx.mec2_fw = NULL;
1040                 }
1041         }
1042
1043         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1044                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046                 info->fw = adev->gfx.pfp_fw;
1047                 header = (const struct common_firmware_header *)info->fw->data;
1048                 adev->firmware.fw_size +=
1049                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053                 info->fw = adev->gfx.me_fw;
1054                 header = (const struct common_firmware_header *)info->fw->data;
1055                 adev->firmware.fw_size +=
1056                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060                 info->fw = adev->gfx.ce_fw;
1061                 header = (const struct common_firmware_header *)info->fw->data;
1062                 adev->firmware.fw_size +=
1063                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067                 info->fw = adev->gfx.rlc_fw;
1068                 header = (const struct common_firmware_header *)info->fw->data;
1069                 adev->firmware.fw_size +=
1070                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074                 info->fw = adev->gfx.mec_fw;
1075                 header = (const struct common_firmware_header *)info->fw->data;
1076                 adev->firmware.fw_size +=
1077                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
1079                 /* we need account JT in */
1080                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081                 adev->firmware.fw_size +=
1082                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
1084                 if (amdgpu_sriov_vf(adev)) {
1085                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087                         info->fw = adev->gfx.mec_fw;
1088                         adev->firmware.fw_size +=
1089                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090                 }
1091
1092                 if (adev->gfx.mec2_fw) {
1093                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095                         info->fw = adev->gfx.mec2_fw;
1096                         header = (const struct common_firmware_header *)info->fw->data;
1097                         adev->firmware.fw_size +=
1098                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099                 }
1100
1101         }
1102
1103 out:
1104         if (err) {
1105                 dev_err(adev->dev,
1106                         "gfx8: Failed to load firmware \"%s\"\n",
1107                         fw_name);
1108                 release_firmware(adev->gfx.pfp_fw);
1109                 adev->gfx.pfp_fw = NULL;
1110                 release_firmware(adev->gfx.me_fw);
1111                 adev->gfx.me_fw = NULL;
1112                 release_firmware(adev->gfx.ce_fw);
1113                 adev->gfx.ce_fw = NULL;
1114                 release_firmware(adev->gfx.rlc_fw);
1115                 adev->gfx.rlc_fw = NULL;
1116                 release_firmware(adev->gfx.mec_fw);
1117                 adev->gfx.mec_fw = NULL;
1118                 release_firmware(adev->gfx.mec2_fw);
1119                 adev->gfx.mec2_fw = NULL;
1120         }
1121         return err;
1122 }
1123
1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125                                     volatile u32 *buffer)
1126 {
1127         u32 count = 0, i;
1128         const struct cs_section_def *sect = NULL;
1129         const struct cs_extent_def *ext = NULL;
1130
1131         if (adev->gfx.rlc.cs_data == NULL)
1132                 return;
1133         if (buffer == NULL)
1134                 return;
1135
1136         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140         buffer[count++] = cpu_to_le32(0x80000000);
1141         buffer[count++] = cpu_to_le32(0x80000000);
1142
1143         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145                         if (sect->id == SECT_CONTEXT) {
1146                                 buffer[count++] =
1147                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1149                                                 PACKET3_SET_CONTEXT_REG_START);
1150                                 for (i = 0; i < ext->reg_count; i++)
1151                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1152                         } else {
1153                                 return;
1154                         }
1155                 }
1156         }
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160                         PACKET3_SET_CONTEXT_REG_START);
1161         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1163
1164         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168         buffer[count++] = cpu_to_le32(0);
1169 }
1170
1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172 {
1173         const __le32 *fw_data;
1174         volatile u32 *dst_ptr;
1175         int me, i, max_me = 4;
1176         u32 bo_offset = 0;
1177         u32 table_offset, table_size;
1178
1179         if (adev->asic_type == CHIP_CARRIZO)
1180                 max_me = 5;
1181
1182         /* write the cp table buffer */
1183         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184         for (me = 0; me < max_me; me++) {
1185                 if (me == 0) {
1186                         const struct gfx_firmware_header_v1_0 *hdr =
1187                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188                         fw_data = (const __le32 *)
1189                                 (adev->gfx.ce_fw->data +
1190                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191                         table_offset = le32_to_cpu(hdr->jt_offset);
1192                         table_size = le32_to_cpu(hdr->jt_size);
1193                 } else if (me == 1) {
1194                         const struct gfx_firmware_header_v1_0 *hdr =
1195                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196                         fw_data = (const __le32 *)
1197                                 (adev->gfx.pfp_fw->data +
1198                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199                         table_offset = le32_to_cpu(hdr->jt_offset);
1200                         table_size = le32_to_cpu(hdr->jt_size);
1201                 } else if (me == 2) {
1202                         const struct gfx_firmware_header_v1_0 *hdr =
1203                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204                         fw_data = (const __le32 *)
1205                                 (adev->gfx.me_fw->data +
1206                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207                         table_offset = le32_to_cpu(hdr->jt_offset);
1208                         table_size = le32_to_cpu(hdr->jt_size);
1209                 } else if (me == 3) {
1210                         const struct gfx_firmware_header_v1_0 *hdr =
1211                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212                         fw_data = (const __le32 *)
1213                                 (adev->gfx.mec_fw->data +
1214                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215                         table_offset = le32_to_cpu(hdr->jt_offset);
1216                         table_size = le32_to_cpu(hdr->jt_size);
1217                 } else  if (me == 4) {
1218                         const struct gfx_firmware_header_v1_0 *hdr =
1219                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220                         fw_data = (const __le32 *)
1221                                 (adev->gfx.mec2_fw->data +
1222                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223                         table_offset = le32_to_cpu(hdr->jt_offset);
1224                         table_size = le32_to_cpu(hdr->jt_size);
1225                 }
1226
1227                 for (i = 0; i < table_size; i ++) {
1228                         dst_ptr[bo_offset + i] =
1229                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230                 }
1231
1232                 bo_offset += table_size;
1233         }
1234 }
1235
1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237 {
1238         int r;
1239
1240         /* clear state block */
1241         if (adev->gfx.rlc.clear_state_obj) {
1242                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243                 if (unlikely(r != 0))
1244                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1245                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1247                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248                 adev->gfx.rlc.clear_state_obj = NULL;
1249         }
1250
1251         /* jump table block */
1252         if (adev->gfx.rlc.cp_table_obj) {
1253                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254                 if (unlikely(r != 0))
1255                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1258                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259                 adev->gfx.rlc.cp_table_obj = NULL;
1260         }
1261 }
1262
1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264 {
1265         volatile u32 *dst_ptr;
1266         u32 dws;
1267         const struct cs_section_def *cs_data;
1268         int r;
1269
1270         adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272         cs_data = adev->gfx.rlc.cs_data;
1273
1274         if (cs_data) {
1275                 /* clear state block */
1276                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280                                              AMDGPU_GEM_DOMAIN_VRAM,
1281                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1283                                              NULL, NULL,
1284                                              &adev->gfx.rlc.clear_state_obj);
1285                         if (r) {
1286                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287                                 gfx_v8_0_rlc_fini(adev);
1288                                 return r;
1289                         }
1290                 }
1291                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292                 if (unlikely(r != 0)) {
1293                         gfx_v8_0_rlc_fini(adev);
1294                         return r;
1295                 }
1296                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297                                   &adev->gfx.rlc.clear_state_gpu_addr);
1298                 if (r) {
1299                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1301                         gfx_v8_0_rlc_fini(adev);
1302                         return r;
1303                 }
1304
1305                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306                 if (r) {
1307                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1308                         gfx_v8_0_rlc_fini(adev);
1309                         return r;
1310                 }
1311                 /* set up the cs buffer */
1312                 dst_ptr = adev->gfx.rlc.cs_ptr;
1313                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316         }
1317
1318         if ((adev->asic_type == CHIP_CARRIZO) ||
1319             (adev->asic_type == CHIP_STONEY)) {
1320                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1321                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323                                              AMDGPU_GEM_DOMAIN_VRAM,
1324                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1326                                              NULL, NULL,
1327                                              &adev->gfx.rlc.cp_table_obj);
1328                         if (r) {
1329                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330                                 return r;
1331                         }
1332                 }
1333
1334                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335                 if (unlikely(r != 0)) {
1336                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340                                   &adev->gfx.rlc.cp_table_gpu_addr);
1341                 if (r) {
1342                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1343                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1344                         return r;
1345                 }
1346                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         int r;
1364
1365         if (adev->gfx.mec.hpd_eop_obj) {
1366                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367                 if (unlikely(r != 0))
1368                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1371                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372                 adev->gfx.mec.hpd_eop_obj = NULL;
1373         }
1374 }
1375
1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377                                   struct amdgpu_ring *ring,
1378                                   struct amdgpu_irq_src *irq)
1379 {
1380         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1381         int r = 0;
1382
1383         r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1384         if (r)
1385                 return r;
1386
1387         ring->adev = NULL;
1388         ring->ring_obj = NULL;
1389         ring->use_doorbell = true;
1390         ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1391         if (adev->gfx.mec2_fw) {
1392                 ring->me = 2;
1393                 ring->pipe = 0;
1394         } else {
1395                 ring->me = 1;
1396                 ring->pipe = 1;
1397         }
1398
1399         ring->queue = 0;
1400         ring->eop_gpu_addr = kiq->eop_gpu_addr;
1401         sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1402         r = amdgpu_ring_init(adev, ring, 1024,
1403                              irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1404         if (r)
1405                 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1406
1407         return r;
1408 }
1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410                                    struct amdgpu_irq_src *irq)
1411 {
1412         amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1413         amdgpu_ring_fini(ring);
1414 }
1415
1416 #define MEC_HPD_SIZE 2048
1417
1418 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1419 {
1420         int r;
1421         u32 *hpd;
1422
1423         /*
1424          * we assign only 1 pipe because all other pipes will
1425          * be handled by KFD
1426          */
1427         adev->gfx.mec.num_mec = 1;
1428         adev->gfx.mec.num_pipe = 1;
1429         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1430
1431         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1432                 r = amdgpu_bo_create(adev,
1433                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1434                                      PAGE_SIZE, true,
1435                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1436                                      &adev->gfx.mec.hpd_eop_obj);
1437                 if (r) {
1438                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1439                         return r;
1440                 }
1441         }
1442
1443         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1444         if (unlikely(r != 0)) {
1445                 gfx_v8_0_mec_fini(adev);
1446                 return r;
1447         }
1448         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1449                           &adev->gfx.mec.hpd_eop_gpu_addr);
1450         if (r) {
1451                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1452                 gfx_v8_0_mec_fini(adev);
1453                 return r;
1454         }
1455         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1456         if (r) {
1457                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1458                 gfx_v8_0_mec_fini(adev);
1459                 return r;
1460         }
1461
1462         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1463
1464         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1465         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1466
1467         return 0;
1468 }
1469
1470 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1471 {
1472         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1475 }
1476
1477 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1478 {
1479         int r;
1480         u32 *hpd;
1481         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1482
1483         r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1484                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1485                                     &kiq->eop_gpu_addr, (void **)&hpd);
1486         if (r) {
1487                 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1488                 return r;
1489         }
1490
1491         memset(hpd, 0, MEC_HPD_SIZE);
1492
1493         r = amdgpu_bo_reserve(kiq->eop_obj, false);
1494         if (unlikely(r != 0))
1495                 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
1496         amdgpu_bo_kunmap(kiq->eop_obj);
1497         amdgpu_bo_unreserve(kiq->eop_obj);
1498
1499         return 0;
1500 }
1501
1502 static const u32 vgpr_init_compute_shader[] =
1503 {
1504         0x7e000209, 0x7e020208,
1505         0x7e040207, 0x7e060206,
1506         0x7e080205, 0x7e0a0204,
1507         0x7e0c0203, 0x7e0e0202,
1508         0x7e100201, 0x7e120200,
1509         0x7e140209, 0x7e160208,
1510         0x7e180207, 0x7e1a0206,
1511         0x7e1c0205, 0x7e1e0204,
1512         0x7e200203, 0x7e220202,
1513         0x7e240201, 0x7e260200,
1514         0x7e280209, 0x7e2a0208,
1515         0x7e2c0207, 0x7e2e0206,
1516         0x7e300205, 0x7e320204,
1517         0x7e340203, 0x7e360202,
1518         0x7e380201, 0x7e3a0200,
1519         0x7e3c0209, 0x7e3e0208,
1520         0x7e400207, 0x7e420206,
1521         0x7e440205, 0x7e460204,
1522         0x7e480203, 0x7e4a0202,
1523         0x7e4c0201, 0x7e4e0200,
1524         0x7e500209, 0x7e520208,
1525         0x7e540207, 0x7e560206,
1526         0x7e580205, 0x7e5a0204,
1527         0x7e5c0203, 0x7e5e0202,
1528         0x7e600201, 0x7e620200,
1529         0x7e640209, 0x7e660208,
1530         0x7e680207, 0x7e6a0206,
1531         0x7e6c0205, 0x7e6e0204,
1532         0x7e700203, 0x7e720202,
1533         0x7e740201, 0x7e760200,
1534         0x7e780209, 0x7e7a0208,
1535         0x7e7c0207, 0x7e7e0206,
1536         0xbf8a0000, 0xbf810000,
1537 };
1538
1539 static const u32 sgpr_init_compute_shader[] =
1540 {
1541         0xbe8a0100, 0xbe8c0102,
1542         0xbe8e0104, 0xbe900106,
1543         0xbe920108, 0xbe940100,
1544         0xbe960102, 0xbe980104,
1545         0xbe9a0106, 0xbe9c0108,
1546         0xbe9e0100, 0xbea00102,
1547         0xbea20104, 0xbea40106,
1548         0xbea60108, 0xbea80100,
1549         0xbeaa0102, 0xbeac0104,
1550         0xbeae0106, 0xbeb00108,
1551         0xbeb20100, 0xbeb40102,
1552         0xbeb60104, 0xbeb80106,
1553         0xbeba0108, 0xbebc0100,
1554         0xbebe0102, 0xbec00104,
1555         0xbec20106, 0xbec40108,
1556         0xbec60100, 0xbec80102,
1557         0xbee60004, 0xbee70005,
1558         0xbeea0006, 0xbeeb0007,
1559         0xbee80008, 0xbee90009,
1560         0xbefc0000, 0xbf8a0000,
1561         0xbf810000, 0x00000000,
1562 };
1563
1564 static const u32 vgpr_init_regs[] =
1565 {
1566         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567         mmCOMPUTE_RESOURCE_LIMITS, 0,
1568         mmCOMPUTE_NUM_THREAD_X, 256*4,
1569         mmCOMPUTE_NUM_THREAD_Y, 1,
1570         mmCOMPUTE_NUM_THREAD_Z, 1,
1571         mmCOMPUTE_PGM_RSRC2, 20,
1572         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582 };
1583
1584 static const u32 sgpr1_init_regs[] =
1585 {
1586         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588         mmCOMPUTE_NUM_THREAD_X, 256*5,
1589         mmCOMPUTE_NUM_THREAD_Y, 1,
1590         mmCOMPUTE_NUM_THREAD_Z, 1,
1591         mmCOMPUTE_PGM_RSRC2, 20,
1592         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602 };
1603
1604 static const u32 sgpr2_init_regs[] =
1605 {
1606         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608         mmCOMPUTE_NUM_THREAD_X, 256*5,
1609         mmCOMPUTE_NUM_THREAD_Y, 1,
1610         mmCOMPUTE_NUM_THREAD_Z, 1,
1611         mmCOMPUTE_PGM_RSRC2, 20,
1612         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622 };
1623
1624 static const u32 sec_ded_counter_registers[] =
1625 {
1626         mmCPC_EDC_ATC_CNT,
1627         mmCPC_EDC_SCRATCH_CNT,
1628         mmCPC_EDC_UCODE_CNT,
1629         mmCPF_EDC_ATC_CNT,
1630         mmCPF_EDC_ROQ_CNT,
1631         mmCPF_EDC_TAG_CNT,
1632         mmCPG_EDC_ATC_CNT,
1633         mmCPG_EDC_DMA_CNT,
1634         mmCPG_EDC_TAG_CNT,
1635         mmDC_EDC_CSINVOC_CNT,
1636         mmDC_EDC_RESTORE_CNT,
1637         mmDC_EDC_STATE_CNT,
1638         mmGDS_EDC_CNT,
1639         mmGDS_EDC_GRBM_CNT,
1640         mmGDS_EDC_OA_DED,
1641         mmSPI_EDC_CNT,
1642         mmSQC_ATC_EDC_GATCL1_CNT,
1643         mmSQC_EDC_CNT,
1644         mmSQ_EDC_DED_CNT,
1645         mmSQ_EDC_INFO,
1646         mmSQ_EDC_SEC_CNT,
1647         mmTCC_EDC_CNT,
1648         mmTCP_ATC_EDC_GATCL1_CNT,
1649         mmTCP_EDC_CNT,
1650         mmTD_EDC_CNT
1651 };
1652
1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654 {
1655         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656         struct amdgpu_ib ib;
1657         struct dma_fence *f = NULL;
1658         int r, i;
1659         u32 tmp;
1660         unsigned total_size, vgpr_offset, sgpr_offset;
1661         u64 gpu_addr;
1662
1663         /* only supported on CZ */
1664         if (adev->asic_type != CHIP_CARRIZO)
1665                 return 0;
1666
1667         /* bail if the compute ring is not ready */
1668         if (!ring->ready)
1669                 return 0;
1670
1671         tmp = RREG32(mmGB_EDC_MODE);
1672         WREG32(mmGB_EDC_MODE, 0);
1673
1674         total_size =
1675                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676         total_size +=
1677                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678         total_size +=
1679                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680         total_size = ALIGN(total_size, 256);
1681         vgpr_offset = total_size;
1682         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683         sgpr_offset = total_size;
1684         total_size += sizeof(sgpr_init_compute_shader);
1685
1686         /* allocate an indirect buffer to put the commands in */
1687         memset(&ib, 0, sizeof(ib));
1688         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1689         if (r) {
1690                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691                 return r;
1692         }
1693
1694         /* load the compute shaders */
1695         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697
1698         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700
1701         /* init the ib length to 0 */
1702         ib.length_dw = 0;
1703
1704         /* VGPR */
1705         /* write the register state for the compute dispatch */
1706         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710         }
1711         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717
1718         /* write dispatch packet */
1719         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720         ib.ptr[ib.length_dw++] = 8; /* x */
1721         ib.ptr[ib.length_dw++] = 1; /* y */
1722         ib.ptr[ib.length_dw++] = 1; /* z */
1723         ib.ptr[ib.length_dw++] =
1724                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725
1726         /* write CS partial flush packet */
1727         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729
1730         /* SGPR1 */
1731         /* write the register state for the compute dispatch */
1732         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736         }
1737         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743
1744         /* write dispatch packet */
1745         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746         ib.ptr[ib.length_dw++] = 8; /* x */
1747         ib.ptr[ib.length_dw++] = 1; /* y */
1748         ib.ptr[ib.length_dw++] = 1; /* z */
1749         ib.ptr[ib.length_dw++] =
1750                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751
1752         /* write CS partial flush packet */
1753         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755
1756         /* SGPR2 */
1757         /* write the register state for the compute dispatch */
1758         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762         }
1763         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769
1770         /* write dispatch packet */
1771         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772         ib.ptr[ib.length_dw++] = 8; /* x */
1773         ib.ptr[ib.length_dw++] = 1; /* y */
1774         ib.ptr[ib.length_dw++] = 1; /* z */
1775         ib.ptr[ib.length_dw++] =
1776                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777
1778         /* write CS partial flush packet */
1779         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781
1782         /* shedule the ib on the ring */
1783         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1784         if (r) {
1785                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786                 goto fail;
1787         }
1788
1789         /* wait for the GPU to finish processing the IB */
1790         r = dma_fence_wait(f, false);
1791         if (r) {
1792                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793                 goto fail;
1794         }
1795
1796         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798         WREG32(mmGB_EDC_MODE, tmp);
1799
1800         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803
1804
1805         /* read back registers to clear the counters */
1806         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807                 RREG32(sec_ded_counter_registers[i]);
1808
1809 fail:
1810         amdgpu_ib_free(adev, &ib, NULL);
1811         dma_fence_put(f);
1812
1813         return r;
1814 }
1815
1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1817 {
1818         u32 gb_addr_config;
1819         u32 mc_shared_chmap, mc_arb_ramcfg;
1820         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821         u32 tmp;
1822         int ret;
1823
1824         switch (adev->asic_type) {
1825         case CHIP_TOPAZ:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_cu_per_sh = 6;
1829                 adev->gfx.config.max_sh_per_se = 1;
1830                 adev->gfx.config.max_backends_per_se = 2;
1831                 adev->gfx.config.max_texture_channel_caches = 2;
1832                 adev->gfx.config.max_gprs = 256;
1833                 adev->gfx.config.max_gs_threads = 32;
1834                 adev->gfx.config.max_hw_contexts = 8;
1835
1836                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841                 break;
1842         case CHIP_FIJI:
1843                 adev->gfx.config.max_shader_engines = 4;
1844                 adev->gfx.config.max_tile_pipes = 16;
1845                 adev->gfx.config.max_cu_per_sh = 16;
1846                 adev->gfx.config.max_sh_per_se = 1;
1847                 adev->gfx.config.max_backends_per_se = 4;
1848                 adev->gfx.config.max_texture_channel_caches = 16;
1849                 adev->gfx.config.max_gprs = 256;
1850                 adev->gfx.config.max_gs_threads = 32;
1851                 adev->gfx.config.max_hw_contexts = 8;
1852
1853                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858                 break;
1859         case CHIP_POLARIS11:
1860         case CHIP_POLARIS12:
1861                 ret = amdgpu_atombios_get_gfx_info(adev);
1862                 if (ret)
1863                         return ret;
1864                 adev->gfx.config.max_gprs = 256;
1865                 adev->gfx.config.max_gs_threads = 32;
1866                 adev->gfx.config.max_hw_contexts = 8;
1867
1868                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1872                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1873                 break;
1874         case CHIP_POLARIS10:
1875                 ret = amdgpu_atombios_get_gfx_info(adev);
1876                 if (ret)
1877                         return ret;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         case CHIP_TONGA:
1889                 adev->gfx.config.max_shader_engines = 4;
1890                 adev->gfx.config.max_tile_pipes = 8;
1891                 adev->gfx.config.max_cu_per_sh = 8;
1892                 adev->gfx.config.max_sh_per_se = 1;
1893                 adev->gfx.config.max_backends_per_se = 2;
1894                 adev->gfx.config.max_texture_channel_caches = 8;
1895                 adev->gfx.config.max_gprs = 256;
1896                 adev->gfx.config.max_gs_threads = 32;
1897                 adev->gfx.config.max_hw_contexts = 8;
1898
1899                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904                 break;
1905         case CHIP_CARRIZO:
1906                 adev->gfx.config.max_shader_engines = 1;
1907                 adev->gfx.config.max_tile_pipes = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910
1911                 switch (adev->pdev->revision) {
1912                 case 0xc4:
1913                 case 0x84:
1914                 case 0xc8:
1915                 case 0xcc:
1916                 case 0xe1:
1917                 case 0xe3:
1918                         /* B10 */
1919                         adev->gfx.config.max_cu_per_sh = 8;
1920                         break;
1921                 case 0xc5:
1922                 case 0x81:
1923                 case 0x85:
1924                 case 0xc9:
1925                 case 0xcd:
1926                 case 0xe2:
1927                 case 0xe4:
1928                         /* B8 */
1929                         adev->gfx.config.max_cu_per_sh = 6;
1930                         break;
1931                 case 0xc6:
1932                 case 0xca:
1933                 case 0xce:
1934                 case 0x88:
1935                         /* B6 */
1936                         adev->gfx.config.max_cu_per_sh = 6;
1937                         break;
1938                 case 0xc7:
1939                 case 0x87:
1940                 case 0xcb:
1941                 case 0xe5:
1942                 case 0x89:
1943                 default:
1944                         /* B4 */
1945                         adev->gfx.config.max_cu_per_sh = 4;
1946                         break;
1947                 }
1948
1949                 adev->gfx.config.max_texture_channel_caches = 2;
1950                 adev->gfx.config.max_gprs = 256;
1951                 adev->gfx.config.max_gs_threads = 32;
1952                 adev->gfx.config.max_hw_contexts = 8;
1953
1954                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1955                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1956                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1957                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1958                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1959                 break;
1960         case CHIP_STONEY:
1961                 adev->gfx.config.max_shader_engines = 1;
1962                 adev->gfx.config.max_tile_pipes = 2;
1963                 adev->gfx.config.max_sh_per_se = 1;
1964                 adev->gfx.config.max_backends_per_se = 1;
1965
1966                 switch (adev->pdev->revision) {
1967                 case 0xc0:
1968                 case 0xc1:
1969                 case 0xc2:
1970                 case 0xc4:
1971                 case 0xc8:
1972                 case 0xc9:
1973                         adev->gfx.config.max_cu_per_sh = 3;
1974                         break;
1975                 case 0xd0:
1976                 case 0xd1:
1977                 case 0xd2:
1978                 default:
1979                         adev->gfx.config.max_cu_per_sh = 2;
1980                         break;
1981                 }
1982
1983                 adev->gfx.config.max_texture_channel_caches = 2;
1984                 adev->gfx.config.max_gprs = 256;
1985                 adev->gfx.config.max_gs_threads = 16;
1986                 adev->gfx.config.max_hw_contexts = 8;
1987
1988                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1992                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1993                 break;
1994         default:
1995                 adev->gfx.config.max_shader_engines = 2;
1996                 adev->gfx.config.max_tile_pipes = 4;
1997                 adev->gfx.config.max_cu_per_sh = 2;
1998                 adev->gfx.config.max_sh_per_se = 1;
1999                 adev->gfx.config.max_backends_per_se = 2;
2000                 adev->gfx.config.max_texture_channel_caches = 4;
2001                 adev->gfx.config.max_gprs = 256;
2002                 adev->gfx.config.max_gs_threads = 32;
2003                 adev->gfx.config.max_hw_contexts = 8;
2004
2005                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2006                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2007                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2008                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2009                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2010                 break;
2011         }
2012
2013         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2014         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2015         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2016
2017         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2018         adev->gfx.config.mem_max_burst_length_bytes = 256;
2019         if (adev->flags & AMD_IS_APU) {
2020                 /* Get memory bank mapping mode. */
2021                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2022                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2023                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2024
2025                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2026                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2027                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2028
2029                 /* Validate settings in case only one DIMM installed. */
2030                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2031                         dimm00_addr_map = 0;
2032                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2033                         dimm01_addr_map = 0;
2034                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2035                         dimm10_addr_map = 0;
2036                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2037                         dimm11_addr_map = 0;
2038
2039                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2040                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2041                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2042                         adev->gfx.config.mem_row_size_in_kb = 2;
2043                 else
2044                         adev->gfx.config.mem_row_size_in_kb = 1;
2045         } else {
2046                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2047                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2048                 if (adev->gfx.config.mem_row_size_in_kb > 4)
2049                         adev->gfx.config.mem_row_size_in_kb = 4;
2050         }
2051
2052         adev->gfx.config.shader_engine_tile_size = 32;
2053         adev->gfx.config.num_gpus = 1;
2054         adev->gfx.config.multi_gpu_tile_size = 64;
2055
2056         /* fix up row size */
2057         switch (adev->gfx.config.mem_row_size_in_kb) {
2058         case 1:
2059         default:
2060                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2061                 break;
2062         case 2:
2063                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2064                 break;
2065         case 4:
2066                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2067                 break;
2068         }
2069         adev->gfx.config.gb_addr_config = gb_addr_config;
2070
2071         return 0;
2072 }
2073
2074 static int gfx_v8_0_sw_init(void *handle)
2075 {
2076         int i, r;
2077         struct amdgpu_ring *ring;
2078         struct amdgpu_kiq *kiq;
2079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2080
2081         /* KIQ event */
2082         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2083         if (r)
2084                 return r;
2085
2086         /* EOP Event */
2087         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2088         if (r)
2089                 return r;
2090
2091         /* Privileged reg */
2092         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2093                               &adev->gfx.priv_reg_irq);
2094         if (r)
2095                 return r;
2096
2097         /* Privileged inst */
2098         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2099                               &adev->gfx.priv_inst_irq);
2100         if (r)
2101                 return r;
2102
2103         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2104
2105         gfx_v8_0_scratch_init(adev);
2106
2107         r = gfx_v8_0_init_microcode(adev);
2108         if (r) {
2109                 DRM_ERROR("Failed to load gfx firmware!\n");
2110                 return r;
2111         }
2112
2113         r = gfx_v8_0_rlc_init(adev);
2114         if (r) {
2115                 DRM_ERROR("Failed to init rlc BOs!\n");
2116                 return r;
2117         }
2118
2119         r = gfx_v8_0_mec_init(adev);
2120         if (r) {
2121                 DRM_ERROR("Failed to init MEC BOs!\n");
2122                 return r;
2123         }
2124
2125         /* set up the gfx ring */
2126         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2127                 ring = &adev->gfx.gfx_ring[i];
2128                 ring->ring_obj = NULL;
2129                 sprintf(ring->name, "gfx");
2130                 /* no gfx doorbells on iceland */
2131                 if (adev->asic_type != CHIP_TOPAZ) {
2132                         ring->use_doorbell = true;
2133                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2134                 }
2135
2136                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2137                                      AMDGPU_CP_IRQ_GFX_EOP);
2138                 if (r)
2139                         return r;
2140         }
2141
2142         /* set up the compute queues */
2143         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2144                 unsigned irq_type;
2145
2146                 /* max 32 queues per MEC */
2147                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2148                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2149                         break;
2150                 }
2151                 ring = &adev->gfx.compute_ring[i];
2152                 ring->ring_obj = NULL;
2153                 ring->use_doorbell = true;
2154                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2155                 ring->me = 1; /* first MEC */
2156                 ring->pipe = i / 8;
2157                 ring->queue = i % 8;
2158                 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
2159                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2160                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2161                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2162                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2163                                      irq_type);
2164                 if (r)
2165                         return r;
2166         }
2167
2168         if (amdgpu_sriov_vf(adev)) {
2169                 r = gfx_v8_0_kiq_init(adev);
2170                 if (r) {
2171                         DRM_ERROR("Failed to init KIQ BOs!\n");
2172                         return r;
2173                 }
2174
2175                 kiq = &adev->gfx.kiq;
2176                 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2177                 if (r)
2178                         return r;
2179
2180                 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2181                 r = gfx_v8_0_compute_mqd_sw_init(adev);
2182                 if (r)
2183                         return r;
2184         }
2185
2186         /* reserve GDS, GWS and OA resource for gfx */
2187         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2188                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2189                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2190         if (r)
2191                 return r;
2192
2193         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2194                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2195                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2196         if (r)
2197                 return r;
2198
2199         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2200                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2201                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2202         if (r)
2203                 return r;
2204
2205         adev->gfx.ce_ram_size = 0x8000;
2206
2207         r = gfx_v8_0_gpu_early_init(adev);
2208         if (r)
2209                 return r;
2210
2211         return 0;
2212 }
2213
2214 static int gfx_v8_0_sw_fini(void *handle)
2215 {
2216         int i;
2217         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2218
2219         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2220         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2221         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2222
2223         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2224                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2225         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2226                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2227
2228         if (amdgpu_sriov_vf(adev)) {
2229                 gfx_v8_0_compute_mqd_sw_fini(adev);
2230                 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2231                 gfx_v8_0_kiq_fini(adev);
2232         }
2233
2234         gfx_v8_0_mec_fini(adev);
2235         gfx_v8_0_rlc_fini(adev);
2236         gfx_v8_0_free_microcode(adev);
2237
2238         return 0;
2239 }
2240
2241 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2242 {
2243         uint32_t *modearray, *mod2array;
2244         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2245         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2246         u32 reg_offset;
2247
2248         modearray = adev->gfx.config.tile_mode_array;
2249         mod2array = adev->gfx.config.macrotile_mode_array;
2250
2251         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2252                 modearray[reg_offset] = 0;
2253
2254         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2255                 mod2array[reg_offset] = 0;
2256
2257         switch (adev->asic_type) {
2258         case CHIP_TOPAZ:
2259                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260                                 PIPE_CONFIG(ADDR_SURF_P2) |
2261                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2262                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2263                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264                                 PIPE_CONFIG(ADDR_SURF_P2) |
2265                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2266                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2267                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268                                 PIPE_CONFIG(ADDR_SURF_P2) |
2269                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2270                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2271                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272                                 PIPE_CONFIG(ADDR_SURF_P2) |
2273                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2275                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276                                 PIPE_CONFIG(ADDR_SURF_P2) |
2277                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2278                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2280                                 PIPE_CONFIG(ADDR_SURF_P2) |
2281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284                                 PIPE_CONFIG(ADDR_SURF_P2) |
2285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2288                                 PIPE_CONFIG(ADDR_SURF_P2));
2289                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2290                                 PIPE_CONFIG(ADDR_SURF_P2) |
2291                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2292                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2293                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                                  PIPE_CONFIG(ADDR_SURF_P2) |
2295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2297                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2298                                  PIPE_CONFIG(ADDR_SURF_P2) |
2299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2301                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2302                                  PIPE_CONFIG(ADDR_SURF_P2) |
2303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306                                  PIPE_CONFIG(ADDR_SURF_P2) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2310                                  PIPE_CONFIG(ADDR_SURF_P2) |
2311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2314                                  PIPE_CONFIG(ADDR_SURF_P2) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2317                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2318                                  PIPE_CONFIG(ADDR_SURF_P2) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2321                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2322                                  PIPE_CONFIG(ADDR_SURF_P2) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2325                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2326                                  PIPE_CONFIG(ADDR_SURF_P2) |
2327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2329                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2330                                  PIPE_CONFIG(ADDR_SURF_P2) |
2331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2333                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2334                                  PIPE_CONFIG(ADDR_SURF_P2) |
2335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2338                                  PIPE_CONFIG(ADDR_SURF_P2) |
2339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2342                                  PIPE_CONFIG(ADDR_SURF_P2) |
2343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2346                                  PIPE_CONFIG(ADDR_SURF_P2) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350                                  PIPE_CONFIG(ADDR_SURF_P2) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                  PIPE_CONFIG(ADDR_SURF_P2) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P2) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361
2362                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2363                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365                                 NUM_BANKS(ADDR_SURF_8_BANK));
2366                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2367                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369                                 NUM_BANKS(ADDR_SURF_8_BANK));
2370                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2371                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2372                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2373                                 NUM_BANKS(ADDR_SURF_8_BANK));
2374                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2376                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377                                 NUM_BANKS(ADDR_SURF_8_BANK));
2378                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381                                 NUM_BANKS(ADDR_SURF_8_BANK));
2382                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385                                 NUM_BANKS(ADDR_SURF_8_BANK));
2386                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2389                                 NUM_BANKS(ADDR_SURF_8_BANK));
2390                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2393                                 NUM_BANKS(ADDR_SURF_16_BANK));
2394                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2397                                 NUM_BANKS(ADDR_SURF_16_BANK));
2398                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2399                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2400                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2401                                  NUM_BANKS(ADDR_SURF_16_BANK));
2402                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2403                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2404                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405                                  NUM_BANKS(ADDR_SURF_16_BANK));
2406                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2408                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2409                                  NUM_BANKS(ADDR_SURF_16_BANK));
2410                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2412                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413                                  NUM_BANKS(ADDR_SURF_16_BANK));
2414                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417                                  NUM_BANKS(ADDR_SURF_8_BANK));
2418
2419                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2420                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2421                             reg_offset != 23)
2422                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2423
2424                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2425                         if (reg_offset != 7)
2426                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2427
2428                 break;
2429         case CHIP_FIJI:
2430                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2437                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2441                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2449                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2453                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2457                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2463                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2464                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2467                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2476                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2484                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2501                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2505                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2521                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2525                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2529                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2533                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2538                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2542                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2543                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2546                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2547                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2548                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2552
2553                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2556                                 NUM_BANKS(ADDR_SURF_8_BANK));
2557                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2559                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2560                                 NUM_BANKS(ADDR_SURF_8_BANK));
2561                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2563                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2564                                 NUM_BANKS(ADDR_SURF_8_BANK));
2565                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568                                 NUM_BANKS(ADDR_SURF_8_BANK));
2569                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572                                 NUM_BANKS(ADDR_SURF_8_BANK));
2573                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2575                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576                                 NUM_BANKS(ADDR_SURF_8_BANK));
2577                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580                                 NUM_BANKS(ADDR_SURF_8_BANK));
2581                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2583                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584                                 NUM_BANKS(ADDR_SURF_8_BANK));
2585                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2587                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588                                 NUM_BANKS(ADDR_SURF_8_BANK));
2589                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592                                  NUM_BANKS(ADDR_SURF_8_BANK));
2593                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2596                                  NUM_BANKS(ADDR_SURF_8_BANK));
2597                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2600                                  NUM_BANKS(ADDR_SURF_8_BANK));
2601                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2603                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2604                                  NUM_BANKS(ADDR_SURF_8_BANK));
2605                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2608                                  NUM_BANKS(ADDR_SURF_4_BANK));
2609
2610                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2611                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2612
2613                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2614                         if (reg_offset != 7)
2615                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2616
2617                 break;
2618         case CHIP_TONGA:
2619                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2638                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2644                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2652                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2653                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2656                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2660                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2664                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2665                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2668                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2678                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2690                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2694                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2695                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2700                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2702                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2706                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2710                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2714                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2716                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2718                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2722                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2723                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2727                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2735                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2737                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2738                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2741
2742                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2744                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2745                                 NUM_BANKS(ADDR_SURF_16_BANK));
2746                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749                                 NUM_BANKS(ADDR_SURF_16_BANK));
2750                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                 NUM_BANKS(ADDR_SURF_16_BANK));
2754                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757                                 NUM_BANKS(ADDR_SURF_16_BANK));
2758                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2760                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2761                                 NUM_BANKS(ADDR_SURF_16_BANK));
2762                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765                                 NUM_BANKS(ADDR_SURF_16_BANK));
2766                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2769                                 NUM_BANKS(ADDR_SURF_16_BANK));
2770                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2772                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773                                 NUM_BANKS(ADDR_SURF_16_BANK));
2774                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777                                 NUM_BANKS(ADDR_SURF_16_BANK));
2778                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2780                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2781                                  NUM_BANKS(ADDR_SURF_16_BANK));
2782                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2785                                  NUM_BANKS(ADDR_SURF_16_BANK));
2786                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2789                                  NUM_BANKS(ADDR_SURF_8_BANK));
2790                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2793                                  NUM_BANKS(ADDR_SURF_4_BANK));
2794                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2797                                  NUM_BANKS(ADDR_SURF_4_BANK));
2798
2799                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2800                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2801
2802                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2803                         if (reg_offset != 7)
2804                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2805
2806                 break;
2807         case CHIP_POLARIS11:
2808         case CHIP_POLARIS12:
2809                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2833                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2837                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2842                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2843                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2855                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2880                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2884                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2892                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2896                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2904                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2907                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2908                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2912                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2918                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2922                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2926                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2927                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2930                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2931
2932                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2934                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935                                 NUM_BANKS(ADDR_SURF_16_BANK));
2936
2937                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2938                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2939                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2940                                 NUM_BANKS(ADDR_SURF_16_BANK));
2941
2942                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2944                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945                                 NUM_BANKS(ADDR_SURF_16_BANK));
2946
2947                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2948                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2949                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950                                 NUM_BANKS(ADDR_SURF_16_BANK));
2951
2952                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2954                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2955                                 NUM_BANKS(ADDR_SURF_16_BANK));
2956
2957                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2959                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2960                                 NUM_BANKS(ADDR_SURF_16_BANK));
2961
2962                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2964                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2965                                 NUM_BANKS(ADDR_SURF_16_BANK));
2966
2967                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2968                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2969                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970                                 NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2979                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2980                                 NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2984                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985                                 NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2995                                 NUM_BANKS(ADDR_SURF_8_BANK));
2996
2997                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3000                                 NUM_BANKS(ADDR_SURF_4_BANK));
3001
3002                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3003                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3004
3005                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3006                         if (reg_offset != 7)
3007                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3008
3009                 break;
3010         case CHIP_POLARIS10:
3011                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3030                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3031                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3034                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3035                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3039                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3041                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3044                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3045                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3048                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3052                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3054                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3056                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3057                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3060                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3062                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3065                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3082                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3086                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3092                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3094                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3098                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3102                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3106                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3108                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3110                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3114                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3118                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3119                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3120                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3121                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3123                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3124                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3126                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3127                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3128                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3129                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3131                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3132                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3133
3134                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3136                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137                                 NUM_BANKS(ADDR_SURF_16_BANK));
3138
3139                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3140                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3141                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3142                                 NUM_BANKS(ADDR_SURF_16_BANK));
3143
3144                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3146                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147                                 NUM_BANKS(ADDR_SURF_16_BANK));
3148
3149                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3150                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3151                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3152                                 NUM_BANKS(ADDR_SURF_16_BANK));
3153
3154                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3156                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157                                 NUM_BANKS(ADDR_SURF_16_BANK));
3158
3159                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162                                 NUM_BANKS(ADDR_SURF_16_BANK));
3163
3164                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3166                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3167                                 NUM_BANKS(ADDR_SURF_16_BANK));
3168
3169                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3170                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3171                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3172                                 NUM_BANKS(ADDR_SURF_16_BANK));
3173
3174                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3175                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3176                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3177                                 NUM_BANKS(ADDR_SURF_16_BANK));
3178
3179                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3181                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182                                 NUM_BANKS(ADDR_SURF_16_BANK));
3183
3184                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_16_BANK));
3188
3189                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3191                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3192                                 NUM_BANKS(ADDR_SURF_8_BANK));
3193
3194                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3197                                 NUM_BANKS(ADDR_SURF_4_BANK));
3198
3199                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3202                                 NUM_BANKS(ADDR_SURF_4_BANK));
3203
3204                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3205                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3206
3207                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3208                         if (reg_offset != 7)
3209                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3210
3211                 break;
3212         case CHIP_STONEY:
3213                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214                                 PIPE_CONFIG(ADDR_SURF_P2) |
3215                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3216                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3218                                 PIPE_CONFIG(ADDR_SURF_P2) |
3219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3222                                 PIPE_CONFIG(ADDR_SURF_P2) |
3223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226                                 PIPE_CONFIG(ADDR_SURF_P2) |
3227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230                                 PIPE_CONFIG(ADDR_SURF_P2) |
3231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3233                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3234                                 PIPE_CONFIG(ADDR_SURF_P2) |
3235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                 PIPE_CONFIG(ADDR_SURF_P2) |
3239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3242                                 PIPE_CONFIG(ADDR_SURF_P2));
3243                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244                                 PIPE_CONFIG(ADDR_SURF_P2) |
3245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3246                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3264                                  PIPE_CONFIG(ADDR_SURF_P2) |
3265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3272                                  PIPE_CONFIG(ADDR_SURF_P2) |
3273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3307                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3315
3316                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                 NUM_BANKS(ADDR_SURF_8_BANK));
3320                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323                                 NUM_BANKS(ADDR_SURF_8_BANK));
3324                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                 NUM_BANKS(ADDR_SURF_8_BANK));
3328                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3331                                 NUM_BANKS(ADDR_SURF_8_BANK));
3332                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3335                                 NUM_BANKS(ADDR_SURF_8_BANK));
3336                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3339                                 NUM_BANKS(ADDR_SURF_8_BANK));
3340                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343                                 NUM_BANKS(ADDR_SURF_8_BANK));
3344                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347                                 NUM_BANKS(ADDR_SURF_16_BANK));
3348                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                 NUM_BANKS(ADDR_SURF_16_BANK));
3352                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3353                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                  NUM_BANKS(ADDR_SURF_16_BANK));
3356                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3357                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3358                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3359                                  NUM_BANKS(ADDR_SURF_16_BANK));
3360                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3361                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3362                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363                                  NUM_BANKS(ADDR_SURF_16_BANK));
3364                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                  NUM_BANKS(ADDR_SURF_16_BANK));
3368                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3371                                  NUM_BANKS(ADDR_SURF_8_BANK));
3372
3373                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3374                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3375                             reg_offset != 23)
3376                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3377
3378                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3379                         if (reg_offset != 7)
3380                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3381
3382                 break;
3383         default:
3384                 dev_warn(adev->dev,
3385                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3386                          adev->asic_type);
3387
3388         case CHIP_CARRIZO:
3389                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390                                 PIPE_CONFIG(ADDR_SURF_P2) |
3391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3394                                 PIPE_CONFIG(ADDR_SURF_P2) |
3395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3398                                 PIPE_CONFIG(ADDR_SURF_P2) |
3399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402                                 PIPE_CONFIG(ADDR_SURF_P2) |
3403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3405                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3406                                 PIPE_CONFIG(ADDR_SURF_P2) |
3407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3409                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3410                                 PIPE_CONFIG(ADDR_SURF_P2) |
3411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3413                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414                                 PIPE_CONFIG(ADDR_SURF_P2) |
3415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3417                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3418                                 PIPE_CONFIG(ADDR_SURF_P2));
3419                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3420                                 PIPE_CONFIG(ADDR_SURF_P2) |
3421                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3422                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3424                                  PIPE_CONFIG(ADDR_SURF_P2) |
3425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428                                  PIPE_CONFIG(ADDR_SURF_P2) |
3429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3432                                  PIPE_CONFIG(ADDR_SURF_P2) |
3433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3436                                  PIPE_CONFIG(ADDR_SURF_P2) |
3437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3439                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3440                                  PIPE_CONFIG(ADDR_SURF_P2) |
3441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3443                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3444                                  PIPE_CONFIG(ADDR_SURF_P2) |
3445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3447                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3448                                  PIPE_CONFIG(ADDR_SURF_P2) |
3449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3452                                  PIPE_CONFIG(ADDR_SURF_P2) |
3453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3456                                  PIPE_CONFIG(ADDR_SURF_P2) |
3457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3460                                  PIPE_CONFIG(ADDR_SURF_P2) |
3461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3464                                  PIPE_CONFIG(ADDR_SURF_P2) |
3465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3467                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3468                                  PIPE_CONFIG(ADDR_SURF_P2) |
3469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3471                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3472                                  PIPE_CONFIG(ADDR_SURF_P2) |
3473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3475                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3476                                  PIPE_CONFIG(ADDR_SURF_P2) |
3477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3479                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3480                                  PIPE_CONFIG(ADDR_SURF_P2) |
3481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3483                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3484                                  PIPE_CONFIG(ADDR_SURF_P2) |
3485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3487                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3488                                  PIPE_CONFIG(ADDR_SURF_P2) |
3489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3491
3492                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3495                                 NUM_BANKS(ADDR_SURF_8_BANK));
3496                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3499                                 NUM_BANKS(ADDR_SURF_8_BANK));
3500                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503                                 NUM_BANKS(ADDR_SURF_8_BANK));
3504                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3507                                 NUM_BANKS(ADDR_SURF_8_BANK));
3508                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3511                                 NUM_BANKS(ADDR_SURF_8_BANK));
3512                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3515                                 NUM_BANKS(ADDR_SURF_8_BANK));
3516                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3519                                 NUM_BANKS(ADDR_SURF_8_BANK));
3520                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523                                 NUM_BANKS(ADDR_SURF_16_BANK));
3524                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527                                 NUM_BANKS(ADDR_SURF_16_BANK));
3528                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3529                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3530                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3531                                  NUM_BANKS(ADDR_SURF_16_BANK));
3532                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3533                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3534                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3535                                  NUM_BANKS(ADDR_SURF_16_BANK));
3536                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3537                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3538                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3539                                  NUM_BANKS(ADDR_SURF_16_BANK));
3540                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3541                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3542                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3543                                  NUM_BANKS(ADDR_SURF_16_BANK));
3544                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3545                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3546                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3547                                  NUM_BANKS(ADDR_SURF_8_BANK));
3548
3549                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3550                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3551                             reg_offset != 23)
3552                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3553
3554                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3555                         if (reg_offset != 7)
3556                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3557
3558                 break;
3559         }
3560 }
3561
3562 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3563                                   u32 se_num, u32 sh_num, u32 instance)
3564 {
3565         u32 data;
3566
3567         if (instance == 0xffffffff)
3568                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3569         else
3570                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3571
3572         if (se_num == 0xffffffff)
3573                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3574         else
3575                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3576
3577         if (sh_num == 0xffffffff)
3578                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3579         else
3580                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3581
3582         WREG32(mmGRBM_GFX_INDEX, data);
3583 }
3584
3585 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3586 {
3587         return (u32)((1ULL << bit_width) - 1);
3588 }
3589
3590 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3591 {
3592         u32 data, mask;
3593
3594         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3595                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3596
3597         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3598
3599         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3600                                        adev->gfx.config.max_sh_per_se);
3601
3602         return (~data) & mask;
3603 }
3604
3605 static void
3606 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3607 {
3608         switch (adev->asic_type) {
3609         case CHIP_FIJI:
3610                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3611                           RB_XSEL2(1) | PKR_MAP(2) |
3612                           PKR_XSEL(1) | PKR_YSEL(1) |
3613                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3614                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3615                            SE_PAIR_YSEL(2);
3616                 break;
3617         case CHIP_TONGA:
3618         case CHIP_POLARIS10:
3619                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3620                           SE_XSEL(1) | SE_YSEL(1);
3621                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3622                            SE_PAIR_YSEL(2);
3623                 break;
3624         case CHIP_TOPAZ:
3625         case CHIP_CARRIZO:
3626                 *rconf |= RB_MAP_PKR0(2);
3627                 *rconf1 |= 0x0;
3628                 break;
3629         case CHIP_POLARIS11:
3630         case CHIP_POLARIS12:
3631                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3632                           SE_XSEL(1) | SE_YSEL(1);
3633                 *rconf1 |= 0x0;
3634                 break;
3635         case CHIP_STONEY:
3636                 *rconf |= 0x0;
3637                 *rconf1 |= 0x0;
3638                 break;
3639         default:
3640                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3641                 break;
3642         }
3643 }
3644
3645 static void
3646 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3647                                         u32 raster_config, u32 raster_config_1,
3648                                         unsigned rb_mask, unsigned num_rb)
3649 {
3650         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3651         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3652         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3653         unsigned rb_per_se = num_rb / num_se;
3654         unsigned se_mask[4];
3655         unsigned se;
3656
3657         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3658         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3659         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3660         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3661
3662         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3663         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3664         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3665
3666         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3667                              (!se_mask[2] && !se_mask[3]))) {
3668                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3669
3670                 if (!se_mask[0] && !se_mask[1]) {
3671                         raster_config_1 |=
3672                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3673                 } else {
3674                         raster_config_1 |=
3675                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3676                 }
3677         }
3678
3679         for (se = 0; se < num_se; se++) {
3680                 unsigned raster_config_se = raster_config;
3681                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3682                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3683                 int idx = (se / 2) * 2;
3684
3685                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3686                         raster_config_se &= ~SE_MAP_MASK;
3687
3688                         if (!se_mask[idx]) {
3689                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3690                         } else {
3691                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3692                         }
3693                 }
3694
3695                 pkr0_mask &= rb_mask;
3696                 pkr1_mask &= rb_mask;
3697                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3698                         raster_config_se &= ~PKR_MAP_MASK;
3699
3700                         if (!pkr0_mask) {
3701                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3702                         } else {
3703                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3704                         }
3705                 }
3706
3707                 if (rb_per_se >= 2) {
3708                         unsigned rb0_mask = 1 << (se * rb_per_se);
3709                         unsigned rb1_mask = rb0_mask << 1;
3710
3711                         rb0_mask &= rb_mask;
3712                         rb1_mask &= rb_mask;
3713                         if (!rb0_mask || !rb1_mask) {
3714                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3715
3716                                 if (!rb0_mask) {
3717                                         raster_config_se |=
3718                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3719                                 } else {
3720                                         raster_config_se |=
3721                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3722                                 }
3723                         }
3724
3725                         if (rb_per_se > 2) {
3726                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3727                                 rb1_mask = rb0_mask << 1;
3728                                 rb0_mask &= rb_mask;
3729                                 rb1_mask &= rb_mask;
3730                                 if (!rb0_mask || !rb1_mask) {
3731                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3732
3733                                         if (!rb0_mask) {
3734                                                 raster_config_se |=
3735                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3736                                         } else {
3737                                                 raster_config_se |=
3738                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3739                                         }
3740                                 }
3741                         }
3742                 }
3743
3744                 /* GRBM_GFX_INDEX has a different offset on VI */
3745                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3746                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3747                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3748         }
3749
3750         /* GRBM_GFX_INDEX has a different offset on VI */
3751         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3752 }
3753
3754 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3755 {
3756         int i, j;
3757         u32 data;
3758         u32 raster_config = 0, raster_config_1 = 0;
3759         u32 active_rbs = 0;
3760         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3761                                         adev->gfx.config.max_sh_per_se;
3762         unsigned num_rb_pipes;
3763
3764         mutex_lock(&adev->grbm_idx_mutex);
3765         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3766                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3767                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3768                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3769                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3770                                                rb_bitmap_width_per_sh);
3771                 }
3772         }
3773         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3774
3775         adev->gfx.config.backend_enable_mask = active_rbs;
3776         adev->gfx.config.num_rbs = hweight32(active_rbs);
3777
3778         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3779                              adev->gfx.config.max_shader_engines, 16);
3780
3781         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3782
3783         if (!adev->gfx.config.backend_enable_mask ||
3784                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3785                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3786                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3787         } else {
3788                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3789                                                         adev->gfx.config.backend_enable_mask,
3790                                                         num_rb_pipes);
3791         }
3792
3793         /* cache the values for userspace */
3794         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3795                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3796                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3797                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3798                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3799                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3800                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3801                         adev->gfx.config.rb_config[i][j].raster_config =
3802                                 RREG32(mmPA_SC_RASTER_CONFIG);
3803                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3804                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3805                 }
3806         }
3807         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3808         mutex_unlock(&adev->grbm_idx_mutex);
3809 }
3810
3811 /**
3812  * gfx_v8_0_init_compute_vmid - gart enable
3813  *
3814  * @rdev: amdgpu_device pointer
3815  *
3816  * Initialize compute vmid sh_mem registers
3817  *
3818  */
3819 #define DEFAULT_SH_MEM_BASES    (0x6000)
3820 #define FIRST_COMPUTE_VMID      (8)
3821 #define LAST_COMPUTE_VMID       (16)
3822 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3823 {
3824         int i;
3825         uint32_t sh_mem_config;
3826         uint32_t sh_mem_bases;
3827
3828         /*
3829          * Configure apertures:
3830          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3831          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3832          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3833          */
3834         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3835
3836         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3837                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3838                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3839                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3840                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3841                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3842
3843         mutex_lock(&adev->srbm_mutex);
3844         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3845                 vi_srbm_select(adev, 0, 0, 0, i);
3846                 /* CP and shaders */
3847                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3848                 WREG32(mmSH_MEM_APE1_BASE, 1);
3849                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3850                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3851         }
3852         vi_srbm_select(adev, 0, 0, 0, 0);
3853         mutex_unlock(&adev->srbm_mutex);
3854 }
3855
3856 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3857 {
3858         switch (adev->asic_type) {
3859         default:
3860                 adev->gfx.config.double_offchip_lds_buf = 1;
3861                 break;
3862         case CHIP_CARRIZO:
3863         case CHIP_STONEY:
3864                 adev->gfx.config.double_offchip_lds_buf = 0;
3865                 break;
3866         }
3867 }
3868
3869 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3870 {
3871         u32 tmp, sh_static_mem_cfg;
3872         int i;
3873
3874         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3875         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3876         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3877         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3878
3879         gfx_v8_0_tiling_mode_table_init(adev);
3880         gfx_v8_0_setup_rb(adev);
3881         gfx_v8_0_get_cu_info(adev);
3882         gfx_v8_0_config_init(adev);
3883
3884         /* XXX SH_MEM regs */
3885         /* where to put LDS, scratch, GPUVM in FSA64 space */
3886         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3887                                    SWIZZLE_ENABLE, 1);
3888         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3889                                    ELEMENT_SIZE, 1);
3890         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3891                                    INDEX_STRIDE, 3);
3892         mutex_lock(&adev->srbm_mutex);
3893         for (i = 0; i < adev->vm_manager.num_ids; i++) {
3894                 vi_srbm_select(adev, 0, 0, 0, i);
3895                 /* CP and shaders */
3896                 if (i == 0) {
3897                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3898                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3899                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3900                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3901                         WREG32(mmSH_MEM_CONFIG, tmp);
3902                         WREG32(mmSH_MEM_BASES, 0);
3903                 } else {
3904                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3905                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3906                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3907                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3908                         WREG32(mmSH_MEM_CONFIG, tmp);
3909                         tmp = adev->mc.shared_aperture_start >> 48;
3910                         WREG32(mmSH_MEM_BASES, tmp);
3911                 }
3912
3913                 WREG32(mmSH_MEM_APE1_BASE, 1);
3914                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3915                 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3916         }
3917         vi_srbm_select(adev, 0, 0, 0, 0);
3918         mutex_unlock(&adev->srbm_mutex);
3919
3920         gfx_v8_0_init_compute_vmid(adev);
3921
3922         mutex_lock(&adev->grbm_idx_mutex);
3923         /*
3924          * making sure that the following register writes will be broadcasted
3925          * to all the shaders
3926          */
3927         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3928
3929         WREG32(mmPA_SC_FIFO_SIZE,
3930                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3931                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3932                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3933                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3934                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3935                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3936                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3937                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3938
3939         tmp = RREG32(mmSPI_ARB_PRIORITY);
3940         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3941         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3942         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3943         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3944         WREG32(mmSPI_ARB_PRIORITY, tmp);
3945
3946         mutex_unlock(&adev->grbm_idx_mutex);
3947
3948 }
3949
3950 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3951 {
3952         u32 i, j, k;
3953         u32 mask;
3954
3955         mutex_lock(&adev->grbm_idx_mutex);
3956         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3957                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3958                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3959                         for (k = 0; k < adev->usec_timeout; k++) {
3960                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3961                                         break;
3962                                 udelay(1);
3963                         }
3964                 }
3965         }
3966         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3967         mutex_unlock(&adev->grbm_idx_mutex);
3968
3969         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3970                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3971                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3972                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3973         for (k = 0; k < adev->usec_timeout; k++) {
3974                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3975                         break;
3976                 udelay(1);
3977         }
3978 }
3979
3980 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3981                                                bool enable)
3982 {
3983         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3984
3985         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3986         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3987         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3988         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3989
3990         WREG32(mmCP_INT_CNTL_RING0, tmp);
3991 }
3992
3993 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3994 {
3995         /* csib */
3996         WREG32(mmRLC_CSIB_ADDR_HI,
3997                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3998         WREG32(mmRLC_CSIB_ADDR_LO,
3999                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4000         WREG32(mmRLC_CSIB_LENGTH,
4001                         adev->gfx.rlc.clear_state_size);
4002 }
4003
4004 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4005                                 int ind_offset,
4006                                 int list_size,
4007                                 int *unique_indices,
4008                                 int *indices_count,
4009                                 int max_indices,
4010                                 int *ind_start_offsets,
4011                                 int *offset_count,
4012                                 int max_offset)
4013 {
4014         int indices;
4015         bool new_entry = true;
4016
4017         for (; ind_offset < list_size; ind_offset++) {
4018
4019                 if (new_entry) {
4020                         new_entry = false;
4021                         ind_start_offsets[*offset_count] = ind_offset;
4022                         *offset_count = *offset_count + 1;
4023                         BUG_ON(*offset_count >= max_offset);
4024                 }
4025
4026                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4027                         new_entry = true;
4028                         continue;
4029                 }
4030
4031                 ind_offset += 2;
4032
4033                 /* look for the matching indice */
4034                 for (indices = 0;
4035                         indices < *indices_count;
4036                         indices++) {
4037                         if (unique_indices[indices] ==
4038                                 register_list_format[ind_offset])
4039                                 break;
4040                 }
4041
4042                 if (indices >= *indices_count) {
4043                         unique_indices[*indices_count] =
4044                                 register_list_format[ind_offset];
4045                         indices = *indices_count;
4046                         *indices_count = *indices_count + 1;
4047                         BUG_ON(*indices_count >= max_indices);
4048                 }
4049
4050                 register_list_format[ind_offset] = indices;
4051         }
4052 }
4053
4054 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4055 {
4056         int i, temp, data;
4057         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4058         int indices_count = 0;
4059         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4060         int offset_count = 0;
4061
4062         int list_size;
4063         unsigned int *register_list_format =
4064                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4065         if (!register_list_format)
4066                 return -ENOMEM;
4067         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4068                         adev->gfx.rlc.reg_list_format_size_bytes);
4069
4070         gfx_v8_0_parse_ind_reg_list(register_list_format,
4071                                 RLC_FormatDirectRegListLength,
4072                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4073                                 unique_indices,
4074                                 &indices_count,
4075                                 sizeof(unique_indices) / sizeof(int),
4076                                 indirect_start_offsets,
4077                                 &offset_count,
4078                                 sizeof(indirect_start_offsets)/sizeof(int));
4079
4080         /* save and restore list */
4081         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4082
4083         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4084         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4085                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4086
4087         /* indirect list */
4088         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4089         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4090                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4091
4092         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4093         list_size = list_size >> 1;
4094         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4095         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4096
4097         /* starting offsets starts */
4098         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4099                 adev->gfx.rlc.starting_offsets_start);
4100         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4101                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4102                                 indirect_start_offsets[i]);
4103
4104         /* unique indices */
4105         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4106         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4107         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4108                 if (unique_indices[i] != 0) {
4109                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4110                         WREG32(data + i, unique_indices[i] >> 20);
4111                 }
4112         }
4113         kfree(register_list_format);
4114
4115         return 0;
4116 }
4117
4118 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4119 {
4120         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4121 }
4122
4123 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4124 {
4125         uint32_t data;
4126
4127         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4128
4129         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4130         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4131         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4132         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4133         WREG32(mmRLC_PG_DELAY, data);
4134
4135         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4136         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4137
4138 }
4139
4140 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4141                                                 bool enable)
4142 {
4143         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4144 }
4145
4146 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4147                                                   bool enable)
4148 {
4149         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4150 }
4151
4152 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4153 {
4154         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4155 }
4156
4157 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4158 {
4159         if ((adev->asic_type == CHIP_CARRIZO) ||
4160             (adev->asic_type == CHIP_STONEY)) {
4161                 gfx_v8_0_init_csb(adev);
4162                 gfx_v8_0_init_save_restore_list(adev);
4163                 gfx_v8_0_enable_save_restore_machine(adev);
4164                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4165                 gfx_v8_0_init_power_gating(adev);
4166                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4167         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4168                    (adev->asic_type == CHIP_POLARIS12)) {
4169                 gfx_v8_0_init_csb(adev);
4170                 gfx_v8_0_init_save_restore_list(adev);
4171                 gfx_v8_0_enable_save_restore_machine(adev);
4172                 gfx_v8_0_init_power_gating(adev);
4173         }
4174
4175 }
4176
4177 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4178 {
4179         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4180
4181         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4182         gfx_v8_0_wait_for_rlc_serdes(adev);
4183 }
4184
4185 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4186 {
4187         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4188         udelay(50);
4189
4190         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4191         udelay(50);
4192 }
4193
4194 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4195 {
4196         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4197
4198         /* carrizo do enable cp interrupt after cp inited */
4199         if (!(adev->flags & AMD_IS_APU))
4200                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4201
4202         udelay(50);
4203 }
4204
4205 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4206 {
4207         const struct rlc_firmware_header_v2_0 *hdr;
4208         const __le32 *fw_data;
4209         unsigned i, fw_size;
4210
4211         if (!adev->gfx.rlc_fw)
4212                 return -EINVAL;
4213
4214         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4215         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4216
4217         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4218                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4219         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4220
4221         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4222         for (i = 0; i < fw_size; i++)
4223                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4224         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4225
4226         return 0;
4227 }
4228
4229 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4230 {
4231         int r;
4232         u32 tmp;
4233
4234         gfx_v8_0_rlc_stop(adev);
4235
4236         /* disable CG */
4237         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4238         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4239                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4240         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4241         if (adev->asic_type == CHIP_POLARIS11 ||
4242             adev->asic_type == CHIP_POLARIS10 ||
4243             adev->asic_type == CHIP_POLARIS12) {
4244                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4245                 tmp &= ~0x3;
4246                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4247         }
4248
4249         /* disable PG */
4250         WREG32(mmRLC_PG_CNTL, 0);
4251
4252         gfx_v8_0_rlc_reset(adev);
4253         gfx_v8_0_init_pg(adev);
4254
4255         if (!adev->pp_enabled) {
4256                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4257                         /* legacy rlc firmware loading */
4258                         r = gfx_v8_0_rlc_load_microcode(adev);
4259                         if (r)
4260                                 return r;
4261                 } else {
4262                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4263                                                         AMDGPU_UCODE_ID_RLC_G);
4264                         if (r)
4265                                 return -EINVAL;
4266                 }
4267         }
4268
4269         gfx_v8_0_rlc_start(adev);
4270
4271         return 0;
4272 }
4273
4274 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4275 {
4276         int i;
4277         u32 tmp = RREG32(mmCP_ME_CNTL);
4278
4279         if (enable) {
4280                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4281                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4282                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4283         } else {
4284                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4285                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4286                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4287                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4288                         adev->gfx.gfx_ring[i].ready = false;
4289         }
4290         WREG32(mmCP_ME_CNTL, tmp);
4291         udelay(50);
4292 }
4293
4294 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4295 {
4296         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4297         const struct gfx_firmware_header_v1_0 *ce_hdr;
4298         const struct gfx_firmware_header_v1_0 *me_hdr;
4299         const __le32 *fw_data;
4300         unsigned i, fw_size;
4301
4302         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4303                 return -EINVAL;
4304
4305         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4306                 adev->gfx.pfp_fw->data;
4307         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4308                 adev->gfx.ce_fw->data;
4309         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4310                 adev->gfx.me_fw->data;
4311
4312         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4313         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4314         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4315
4316         gfx_v8_0_cp_gfx_enable(adev, false);
4317
4318         /* PFP */
4319         fw_data = (const __le32 *)
4320                 (adev->gfx.pfp_fw->data +
4321                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4322         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4323         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4324         for (i = 0; i < fw_size; i++)
4325                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4326         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4327
4328         /* CE */
4329         fw_data = (const __le32 *)
4330                 (adev->gfx.ce_fw->data +
4331                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4332         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4333         WREG32(mmCP_CE_UCODE_ADDR, 0);
4334         for (i = 0; i < fw_size; i++)
4335                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4336         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4337
4338         /* ME */
4339         fw_data = (const __le32 *)
4340                 (adev->gfx.me_fw->data +
4341                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4342         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4343         WREG32(mmCP_ME_RAM_WADDR, 0);
4344         for (i = 0; i < fw_size; i++)
4345                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4346         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4347
4348         return 0;
4349 }
4350
4351 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4352 {
4353         u32 count = 0;
4354         const struct cs_section_def *sect = NULL;
4355         const struct cs_extent_def *ext = NULL;
4356
4357         /* begin clear state */
4358         count += 2;
4359         /* context control state */
4360         count += 3;
4361
4362         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4363                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4364                         if (sect->id == SECT_CONTEXT)
4365                                 count += 2 + ext->reg_count;
4366                         else
4367                                 return 0;
4368                 }
4369         }
4370         /* pa_sc_raster_config/pa_sc_raster_config1 */
4371         count += 4;
4372         /* end clear state */
4373         count += 2;
4374         /* clear state */
4375         count += 2;
4376
4377         return count;
4378 }
4379
4380 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4381 {
4382         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4383         const struct cs_section_def *sect = NULL;
4384         const struct cs_extent_def *ext = NULL;
4385         int r, i;
4386
4387         /* init the CP */
4388         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4389         WREG32(mmCP_ENDIAN_SWAP, 0);
4390         WREG32(mmCP_DEVICE_ID, 1);
4391
4392         gfx_v8_0_cp_gfx_enable(adev, true);
4393
4394         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4395         if (r) {
4396                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4397                 return r;
4398         }
4399
4400         /* clear state buffer */
4401         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4402         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4403
4404         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4405         amdgpu_ring_write(ring, 0x80000000);
4406         amdgpu_ring_write(ring, 0x80000000);
4407
4408         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4409                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4410                         if (sect->id == SECT_CONTEXT) {
4411                                 amdgpu_ring_write(ring,
4412                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4413                                                ext->reg_count));
4414                                 amdgpu_ring_write(ring,
4415                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4416                                 for (i = 0; i < ext->reg_count; i++)
4417                                         amdgpu_ring_write(ring, ext->extent[i]);
4418                         }
4419                 }
4420         }
4421
4422         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4423         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4424         switch (adev->asic_type) {
4425         case CHIP_TONGA:
4426         case CHIP_POLARIS10:
4427                 amdgpu_ring_write(ring, 0x16000012);
4428                 amdgpu_ring_write(ring, 0x0000002A);
4429                 break;
4430         case CHIP_POLARIS11:
4431         case CHIP_POLARIS12:
4432                 amdgpu_ring_write(ring, 0x16000012);
4433                 amdgpu_ring_write(ring, 0x00000000);
4434                 break;
4435         case CHIP_FIJI:
4436                 amdgpu_ring_write(ring, 0x3a00161a);
4437                 amdgpu_ring_write(ring, 0x0000002e);
4438                 break;
4439         case CHIP_CARRIZO:
4440                 amdgpu_ring_write(ring, 0x00000002);
4441                 amdgpu_ring_write(ring, 0x00000000);
4442                 break;
4443         case CHIP_TOPAZ:
4444                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4445                                 0x00000000 : 0x00000002);
4446                 amdgpu_ring_write(ring, 0x00000000);
4447                 break;
4448         case CHIP_STONEY:
4449                 amdgpu_ring_write(ring, 0x00000000);
4450                 amdgpu_ring_write(ring, 0x00000000);
4451                 break;
4452         default:
4453                 BUG();
4454         }
4455
4456         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4457         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4458
4459         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4460         amdgpu_ring_write(ring, 0);
4461
4462         /* init the CE partitions */
4463         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4464         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4465         amdgpu_ring_write(ring, 0x8000);
4466         amdgpu_ring_write(ring, 0x8000);
4467
4468         amdgpu_ring_commit(ring);
4469
4470         return 0;
4471 }
4472
4473 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4474 {
4475         struct amdgpu_ring *ring;
4476         u32 tmp;
4477         u32 rb_bufsz;
4478         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4479         int r;
4480
4481         /* Set the write pointer delay */
4482         WREG32(mmCP_RB_WPTR_DELAY, 0);
4483
4484         /* set the RB to use vmid 0 */
4485         WREG32(mmCP_RB_VMID, 0);
4486
4487         /* Set ring buffer size */
4488         ring = &adev->gfx.gfx_ring[0];
4489         rb_bufsz = order_base_2(ring->ring_size / 8);
4490         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4491         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4492         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4493         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4494 #ifdef __BIG_ENDIAN
4495         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4496 #endif
4497         WREG32(mmCP_RB0_CNTL, tmp);
4498
4499         /* Initialize the ring buffer's read and write pointers */
4500         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4501         ring->wptr = 0;
4502         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4503
4504         /* set the wb address wether it's enabled or not */
4505         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4506         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4507         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4508
4509         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4510         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4511         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4512         mdelay(1);
4513         WREG32(mmCP_RB0_CNTL, tmp);
4514
4515         rb_addr = ring->gpu_addr >> 8;
4516         WREG32(mmCP_RB0_BASE, rb_addr);
4517         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4518
4519         /* no gfx doorbells on iceland */
4520         if (adev->asic_type != CHIP_TOPAZ) {
4521                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4522                 if (ring->use_doorbell) {
4523                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4524                                             DOORBELL_OFFSET, ring->doorbell_index);
4525                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4526                                             DOORBELL_HIT, 0);
4527                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4528                                             DOORBELL_EN, 1);
4529                 } else {
4530                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4531                                             DOORBELL_EN, 0);
4532                 }
4533                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4534
4535                 if (adev->asic_type == CHIP_TONGA) {
4536                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4537                                             DOORBELL_RANGE_LOWER,
4538                                             AMDGPU_DOORBELL_GFX_RING0);
4539                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4540
4541                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4542                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4543                 }
4544
4545         }
4546
4547         /* start the ring */
4548         amdgpu_ring_clear_ring(ring);
4549         gfx_v8_0_cp_gfx_start(adev);
4550         ring->ready = true;
4551         r = amdgpu_ring_test_ring(ring);
4552         if (r)
4553                 ring->ready = false;
4554
4555         return r;
4556 }
4557
4558 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4559 {
4560         int i;
4561
4562         if (enable) {
4563                 WREG32(mmCP_MEC_CNTL, 0);
4564         } else {
4565                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4566                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4567                         adev->gfx.compute_ring[i].ready = false;
4568         }
4569         udelay(50);
4570 }
4571
4572 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4573 {
4574         const struct gfx_firmware_header_v1_0 *mec_hdr;
4575         const __le32 *fw_data;
4576         unsigned i, fw_size;
4577
4578         if (!adev->gfx.mec_fw)
4579                 return -EINVAL;
4580
4581         gfx_v8_0_cp_compute_enable(adev, false);
4582
4583         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4584         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4585
4586         fw_data = (const __le32 *)
4587                 (adev->gfx.mec_fw->data +
4588                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4589         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4590
4591         /* MEC1 */
4592         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4593         for (i = 0; i < fw_size; i++)
4594                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4595         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4596
4597         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4598         if (adev->gfx.mec2_fw) {
4599                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4600
4601                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4602                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4603
4604                 fw_data = (const __le32 *)
4605                         (adev->gfx.mec2_fw->data +
4606                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4607                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4608
4609                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4610                 for (i = 0; i < fw_size; i++)
4611                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4612                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4613         }
4614
4615         return 0;
4616 }
4617
4618 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4619 {
4620         int i, r;
4621
4622         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4623                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4624
4625                 if (ring->mqd_obj) {
4626                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4627                         if (unlikely(r != 0))
4628                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4629
4630                         amdgpu_bo_unpin(ring->mqd_obj);
4631                         amdgpu_bo_unreserve(ring->mqd_obj);
4632
4633                         amdgpu_bo_unref(&ring->mqd_obj);
4634                         ring->mqd_obj = NULL;
4635                         ring->mqd_ptr = NULL;
4636                         ring->mqd_gpu_addr = 0;
4637                 }
4638         }
4639 }
4640
4641 /* KIQ functions */
4642 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4643 {
4644         uint32_t tmp;
4645         struct amdgpu_device *adev = ring->adev;
4646
4647         /* tell RLC which is KIQ queue */
4648         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4649         tmp &= 0xffffff00;
4650         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4651         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4652         tmp |= 0x80;
4653         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4654 }
4655
4656 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4657 {
4658         amdgpu_ring_alloc(ring, 8);
4659         /* set resources */
4660         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4661         amdgpu_ring_write(ring, 0);     /* vmid_mask:0 queue_type:0 (KIQ) */
4662         amdgpu_ring_write(ring, 0x000000FF);    /* queue mask lo */
4663         amdgpu_ring_write(ring, 0);     /* queue mask hi */
4664         amdgpu_ring_write(ring, 0);     /* gws mask lo */
4665         amdgpu_ring_write(ring, 0);     /* gws mask hi */
4666         amdgpu_ring_write(ring, 0);     /* oac mask */
4667         amdgpu_ring_write(ring, 0);     /* gds heap base:0, gds heap size:0 */
4668         amdgpu_ring_commit(ring);
4669         udelay(50);
4670 }
4671
4672 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4673                                    struct amdgpu_ring *ring)
4674 {
4675         struct amdgpu_device *adev = kiq_ring->adev;
4676         uint64_t mqd_addr, wptr_addr;
4677
4678         mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4679         wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4680         amdgpu_ring_alloc(kiq_ring, 8);
4681
4682         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4683         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4684         amdgpu_ring_write(kiq_ring, 0x21010000);
4685         amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4686                         (ring->queue << 26) |
4687                         (ring->pipe << 29) |
4688                         ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4689         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4690         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4691         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4692         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4693         amdgpu_ring_commit(kiq_ring);
4694         udelay(50);
4695 }
4696
4697 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4698 {
4699         struct amdgpu_device *adev = ring->adev;
4700         struct vi_mqd *mqd = ring->mqd_ptr;
4701         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4702         uint32_t tmp;
4703
4704         mqd->header = 0xC0310800;
4705         mqd->compute_pipelinestat_enable = 0x00000001;
4706         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4707         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4708         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4709         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4710         mqd->compute_misc_reserved = 0x00000003;
4711
4712         eop_base_addr = ring->eop_gpu_addr >> 8;
4713         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4714         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4715
4716         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4717         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4718         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4719                         (order_base_2(MEC_HPD_SIZE / 4) - 1));
4720
4721         mqd->cp_hqd_eop_control = tmp;
4722
4723         /* enable doorbell? */
4724         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4725
4726         if (ring->use_doorbell)
4727                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4728                                          DOORBELL_EN, 1);
4729         else
4730                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4731                                          DOORBELL_EN, 0);
4732
4733         mqd->cp_hqd_pq_doorbell_control = tmp;
4734
4735         /* disable the queue if it's active */
4736         mqd->cp_hqd_dequeue_request = 0;
4737         mqd->cp_hqd_pq_rptr = 0;
4738         mqd->cp_hqd_pq_wptr = 0;
4739
4740         /* set the pointer to the MQD */
4741         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4742         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4743
4744         /* set MQD vmid to 0 */
4745         tmp = RREG32(mmCP_MQD_CONTROL);
4746         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4747         mqd->cp_mqd_control = tmp;
4748
4749         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4750         hqd_gpu_addr = ring->gpu_addr >> 8;
4751         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4752         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4753
4754         /* set up the HQD, this is similar to CP_RB0_CNTL */
4755         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4756         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4757                             (order_base_2(ring->ring_size / 4) - 1));
4758         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4759                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4760 #ifdef __BIG_ENDIAN
4761         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4762 #endif
4763         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4764         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4765         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4766         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4767         mqd->cp_hqd_pq_control = tmp;
4768
4769         /* set the wb address whether it's enabled or not */
4770         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4771         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4772         mqd->cp_hqd_pq_rptr_report_addr_hi =
4773                 upper_32_bits(wb_gpu_addr) & 0xffff;
4774
4775         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4776         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4777         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4778         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4779
4780         tmp = 0;
4781         /* enable the doorbell if requested */
4782         if (ring->use_doorbell) {
4783                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4784                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4785                                 DOORBELL_OFFSET, ring->doorbell_index);
4786
4787                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4788                                          DOORBELL_EN, 1);
4789                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4790                                          DOORBELL_SOURCE, 0);
4791                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4792                                          DOORBELL_HIT, 0);
4793         }
4794
4795         mqd->cp_hqd_pq_doorbell_control = tmp;
4796
4797         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4798         ring->wptr = 0;
4799         mqd->cp_hqd_pq_wptr = ring->wptr;
4800         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4801
4802         /* set the vmid for the queue */
4803         mqd->cp_hqd_vmid = 0;
4804
4805         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4806         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4807         mqd->cp_hqd_persistent_state = tmp;
4808
4809         /* activate the queue */
4810         mqd->cp_hqd_active = 1;
4811
4812         return 0;
4813 }
4814
4815 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
4816 {
4817         struct amdgpu_device *adev = ring->adev;
4818         struct vi_mqd *mqd = ring->mqd_ptr;
4819         uint32_t tmp;
4820         int j;
4821
4822         /* disable wptr polling */
4823         tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4824         tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4825         WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4826
4827         WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4828         WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4829
4830         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4831         WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4832
4833         /* enable doorbell? */
4834         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4835
4836         /* disable the queue if it's active */
4837         if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4838                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4839                 for (j = 0; j < adev->usec_timeout; j++) {
4840                         if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4841                                 break;
4842                         udelay(1);
4843                 }
4844                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4845                 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4846                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4847         }
4848
4849         /* set the pointer to the MQD */
4850         WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4851         WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4852
4853         /* set MQD vmid to 0 */
4854         WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4855
4856         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4857         WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4858         WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4859
4860         /* set up the HQD, this is similar to CP_RB0_CNTL */
4861         WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4862
4863         /* set the wb address whether it's enabled or not */
4864         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4865                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
4866         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4867                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
4868
4869         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4870         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4871         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4872
4873         /* enable the doorbell if requested */
4874         if (ring->use_doorbell) {
4875                 if ((adev->asic_type == CHIP_CARRIZO) ||
4876                                 (adev->asic_type == CHIP_FIJI) ||
4877                                 (adev->asic_type == CHIP_STONEY)) {
4878                         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4879                                                 AMDGPU_DOORBELL_KIQ << 2);
4880                         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4881                                                 AMDGPU_DOORBELL_MEC_RING7 << 2);
4882                 }
4883         }
4884         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4885
4886         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4887         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4888
4889         /* set the vmid for the queue */
4890         WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4891
4892         WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4893
4894         /* activate the queue */
4895         WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4896
4897         if (ring->use_doorbell) {
4898                 tmp = RREG32(mmCP_PQ_STATUS);
4899                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4900                 WREG32(mmCP_PQ_STATUS, tmp);
4901         }
4902
4903         return 0;
4904 }
4905
4906 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4907 {
4908         struct amdgpu_device *adev = ring->adev;
4909         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4910         struct vi_mqd *mqd = ring->mqd_ptr;
4911         bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
4912         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4913
4914         if (is_kiq) {
4915                 gfx_v8_0_kiq_setting(&kiq->ring);
4916         } else {
4917                 mqd_idx = ring - &adev->gfx.compute_ring[0];
4918         }
4919
4920         if (!adev->gfx.in_reset) {
4921                 memset((void *)mqd, 0, sizeof(*mqd));
4922                 mutex_lock(&adev->srbm_mutex);
4923                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4924                 gfx_v8_0_mqd_init(ring);
4925                 if (is_kiq)
4926                         gfx_v8_0_kiq_init_register(ring);
4927                 vi_srbm_select(adev, 0, 0, 0, 0);
4928                 mutex_unlock(&adev->srbm_mutex);
4929
4930                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4931                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4932         } else { /* for GPU_RESET case */
4933                 /* reset MQD to a clean status */
4934                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4935                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4936
4937                 /* reset ring buffer */
4938                 ring->wptr = 0;
4939                 amdgpu_ring_clear_ring(ring);
4940
4941                 if (is_kiq) {
4942                     mutex_lock(&adev->srbm_mutex);
4943                     vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4944                     gfx_v8_0_kiq_init_register(ring);
4945                     vi_srbm_select(adev, 0, 0, 0, 0);
4946                     mutex_unlock(&adev->srbm_mutex);
4947                 }
4948         }
4949
4950         if (is_kiq)
4951                 gfx_v8_0_kiq_enable(ring);
4952         else
4953                 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4954
4955         return 0;
4956 }
4957
4958 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4959 {
4960         struct amdgpu_ring *ring = NULL;
4961         int r = 0, i;
4962
4963         gfx_v8_0_cp_compute_enable(adev, true);
4964
4965         ring = &adev->gfx.kiq.ring;
4966
4967         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4968         if (unlikely(r != 0))
4969                 goto done;
4970
4971         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4972         if (!r) {
4973                 r = gfx_v8_0_kiq_init_queue(ring);
4974                 amdgpu_bo_kunmap(ring->mqd_obj);
4975                 ring->mqd_ptr = NULL;
4976         }
4977         amdgpu_bo_unreserve(ring->mqd_obj);
4978         if (r)
4979                 goto done;
4980
4981         ring->ready = true;
4982         r = amdgpu_ring_test_ring(ring);
4983         if (r) {
4984                 ring->ready = false;
4985                 goto done;
4986         }
4987
4988         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4989                 ring = &adev->gfx.compute_ring[i];
4990
4991                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4992                 if (unlikely(r != 0))
4993                         goto done;
4994                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4995                 if (!r) {
4996                         r = gfx_v8_0_kiq_init_queue(ring);
4997                         amdgpu_bo_kunmap(ring->mqd_obj);
4998                         ring->mqd_ptr = NULL;
4999                 }
5000                 amdgpu_bo_unreserve(ring->mqd_obj);
5001                 if (r)
5002                         goto done;
5003
5004                 ring->ready = true;
5005                 r = amdgpu_ring_test_ring(ring);
5006                 if (r)
5007                         ring->ready = false;
5008         }
5009
5010 done:
5011         return r;
5012 }
5013
5014 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5015 {
5016         int r, i, j;
5017         u32 tmp;
5018         bool use_doorbell = true;
5019         u64 hqd_gpu_addr;
5020         u64 mqd_gpu_addr;
5021         u64 eop_gpu_addr;
5022         u64 wb_gpu_addr;
5023         u32 *buf;
5024         struct vi_mqd *mqd;
5025
5026         /* init the queues.  */
5027         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5028                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5029
5030                 if (ring->mqd_obj == NULL) {
5031                         r = amdgpu_bo_create(adev,
5032                                              sizeof(struct vi_mqd),
5033                                              PAGE_SIZE, true,
5034                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5035                                              NULL, &ring->mqd_obj);
5036                         if (r) {
5037                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5038                                 return r;
5039                         }
5040                 }
5041
5042                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5043                 if (unlikely(r != 0)) {
5044                         gfx_v8_0_cp_compute_fini(adev);
5045                         return r;
5046                 }
5047                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5048                                   &mqd_gpu_addr);
5049                 if (r) {
5050                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5051                         gfx_v8_0_cp_compute_fini(adev);
5052                         return r;
5053                 }
5054                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5055                 if (r) {
5056                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5057                         gfx_v8_0_cp_compute_fini(adev);
5058                         return r;
5059                 }
5060
5061                 /* init the mqd struct */
5062                 memset(buf, 0, sizeof(struct vi_mqd));
5063
5064                 mqd = (struct vi_mqd *)buf;
5065                 mqd->header = 0xC0310800;
5066                 mqd->compute_pipelinestat_enable = 0x00000001;
5067                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5068                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5069                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5070                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5071                 mqd->compute_misc_reserved = 0x00000003;
5072
5073                 mutex_lock(&adev->srbm_mutex);
5074                 vi_srbm_select(adev, ring->me,
5075                                ring->pipe,
5076                                ring->queue, 0);
5077
5078                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5079                 eop_gpu_addr >>= 8;
5080
5081                 /* write the EOP addr */
5082                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5083                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5084
5085                 /* set the VMID assigned */
5086                 WREG32(mmCP_HQD_VMID, 0);
5087
5088                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5089                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5090                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5091                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
5092                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5093
5094                 /* disable wptr polling */
5095                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5096                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5097                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5098
5099                 mqd->cp_hqd_eop_base_addr_lo =
5100                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
5101                 mqd->cp_hqd_eop_base_addr_hi =
5102                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5103
5104                 /* enable doorbell? */
5105                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5106                 if (use_doorbell) {
5107                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5108                 } else {
5109                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5110                 }
5111                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5112                 mqd->cp_hqd_pq_doorbell_control = tmp;
5113
5114                 /* disable the queue if it's active */
5115                 mqd->cp_hqd_dequeue_request = 0;
5116                 mqd->cp_hqd_pq_rptr = 0;
5117                 mqd->cp_hqd_pq_wptr= 0;
5118                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5119                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5120                         for (j = 0; j < adev->usec_timeout; j++) {
5121                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5122                                         break;
5123                                 udelay(1);
5124                         }
5125                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5126                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5127                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5128                 }
5129
5130                 /* set the pointer to the MQD */
5131                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5132                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5133                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5134                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5135
5136                 /* set MQD vmid to 0 */
5137                 tmp = RREG32(mmCP_MQD_CONTROL);
5138                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5139                 WREG32(mmCP_MQD_CONTROL, tmp);
5140                 mqd->cp_mqd_control = tmp;
5141
5142                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5143                 hqd_gpu_addr = ring->gpu_addr >> 8;
5144                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5145                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5146                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5147                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5148
5149                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5150                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5151                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5152                                     (order_base_2(ring->ring_size / 4) - 1));
5153                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5154                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5155 #ifdef __BIG_ENDIAN
5156                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5157 #endif
5158                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5159                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5160                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5161                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5162                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5163                 mqd->cp_hqd_pq_control = tmp;
5164
5165                 /* set the wb address wether it's enabled or not */
5166                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5167                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5168                 mqd->cp_hqd_pq_rptr_report_addr_hi =
5169                         upper_32_bits(wb_gpu_addr) & 0xffff;
5170                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5171                        mqd->cp_hqd_pq_rptr_report_addr_lo);
5172                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5173                        mqd->cp_hqd_pq_rptr_report_addr_hi);
5174
5175                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5176                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5177                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5178                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5179                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5180                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5181                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
5182
5183                 /* enable the doorbell if requested */
5184                 if (use_doorbell) {
5185                         if ((adev->asic_type == CHIP_CARRIZO) ||
5186                             (adev->asic_type == CHIP_FIJI) ||
5187                             (adev->asic_type == CHIP_STONEY) ||
5188                             (adev->asic_type == CHIP_POLARIS11) ||
5189                             (adev->asic_type == CHIP_POLARIS10) ||
5190                             (adev->asic_type == CHIP_POLARIS12)) {
5191                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5192                                        AMDGPU_DOORBELL_KIQ << 2);
5193                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5194                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
5195                         }
5196                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5197                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5198                                             DOORBELL_OFFSET, ring->doorbell_index);
5199                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5200                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5201                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5202                         mqd->cp_hqd_pq_doorbell_control = tmp;
5203
5204                 } else {
5205                         mqd->cp_hqd_pq_doorbell_control = 0;
5206                 }
5207                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5208                        mqd->cp_hqd_pq_doorbell_control);
5209
5210                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5211                 ring->wptr = 0;
5212                 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
5213                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5214                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5215
5216                 /* set the vmid for the queue */
5217                 mqd->cp_hqd_vmid = 0;
5218                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5219
5220                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5221                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5222                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5223                 mqd->cp_hqd_persistent_state = tmp;
5224                 if (adev->asic_type == CHIP_STONEY ||
5225                         adev->asic_type == CHIP_POLARIS11 ||
5226                         adev->asic_type == CHIP_POLARIS10 ||
5227                         adev->asic_type == CHIP_POLARIS12) {
5228                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5229                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5230                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5231                 }
5232
5233                 /* activate the queue */
5234                 mqd->cp_hqd_active = 1;
5235                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5236
5237                 vi_srbm_select(adev, 0, 0, 0, 0);
5238                 mutex_unlock(&adev->srbm_mutex);
5239
5240                 amdgpu_bo_kunmap(ring->mqd_obj);
5241                 amdgpu_bo_unreserve(ring->mqd_obj);
5242         }
5243
5244         if (use_doorbell) {
5245                 tmp = RREG32(mmCP_PQ_STATUS);
5246                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5247                 WREG32(mmCP_PQ_STATUS, tmp);
5248         }
5249
5250         gfx_v8_0_cp_compute_enable(adev, true);
5251
5252         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5253                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5254
5255                 ring->ready = true;
5256                 r = amdgpu_ring_test_ring(ring);
5257                 if (r)
5258                         ring->ready = false;
5259         }
5260
5261         return 0;
5262 }
5263
5264 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5265 {
5266         int r;
5267
5268         if (!(adev->flags & AMD_IS_APU))
5269                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5270
5271         if (!adev->pp_enabled) {
5272                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
5273                         /* legacy firmware loading */
5274                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5275                         if (r)
5276                                 return r;
5277
5278                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5279                         if (r)
5280                                 return r;
5281                 } else {
5282                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5283                                                         AMDGPU_UCODE_ID_CP_CE);
5284                         if (r)
5285                                 return -EINVAL;
5286
5287                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5288                                                         AMDGPU_UCODE_ID_CP_PFP);
5289                         if (r)
5290                                 return -EINVAL;
5291
5292                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5293                                                         AMDGPU_UCODE_ID_CP_ME);
5294                         if (r)
5295                                 return -EINVAL;
5296
5297                         if (adev->asic_type == CHIP_TOPAZ) {
5298                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5299                                 if (r)
5300                                         return r;
5301                         } else {
5302                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5303                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5304                                 if (r)
5305                                         return -EINVAL;
5306                         }
5307                 }
5308         }
5309
5310         r = gfx_v8_0_cp_gfx_resume(adev);
5311         if (r)
5312                 return r;
5313
5314         if (amdgpu_sriov_vf(adev))
5315                 r = gfx_v8_0_kiq_resume(adev);
5316         else
5317                 r = gfx_v8_0_cp_compute_resume(adev);
5318         if (r)
5319                 return r;
5320
5321         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5322
5323         return 0;
5324 }
5325
5326 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5327 {
5328         gfx_v8_0_cp_gfx_enable(adev, enable);
5329         gfx_v8_0_cp_compute_enable(adev, enable);
5330 }
5331
5332 static int gfx_v8_0_hw_init(void *handle)
5333 {
5334         int r;
5335         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5336
5337         gfx_v8_0_init_golden_registers(adev);
5338         gfx_v8_0_gpu_init(adev);
5339
5340         r = gfx_v8_0_rlc_resume(adev);
5341         if (r)
5342                 return r;
5343
5344         r = gfx_v8_0_cp_resume(adev);
5345
5346         return r;
5347 }
5348
5349 static int gfx_v8_0_hw_fini(void *handle)
5350 {
5351         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5352
5353         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5354         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5355         if (amdgpu_sriov_vf(adev)) {
5356                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5357                 return 0;
5358         }
5359         gfx_v8_0_cp_enable(adev, false);
5360         gfx_v8_0_rlc_stop(adev);
5361         gfx_v8_0_cp_compute_fini(adev);
5362
5363         amdgpu_set_powergating_state(adev,
5364                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5365
5366         return 0;
5367 }
5368
5369 static int gfx_v8_0_suspend(void *handle)
5370 {
5371         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5372
5373         return gfx_v8_0_hw_fini(adev);
5374 }
5375
5376 static int gfx_v8_0_resume(void *handle)
5377 {
5378         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5379
5380         return gfx_v8_0_hw_init(adev);
5381 }
5382
5383 static bool gfx_v8_0_is_idle(void *handle)
5384 {
5385         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5386
5387         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5388                 return false;
5389         else
5390                 return true;
5391 }
5392
5393 static int gfx_v8_0_wait_for_idle(void *handle)
5394 {
5395         unsigned i;
5396         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5397
5398         for (i = 0; i < adev->usec_timeout; i++) {
5399                 if (gfx_v8_0_is_idle(handle))
5400                         return 0;
5401
5402                 udelay(1);
5403         }
5404         return -ETIMEDOUT;
5405 }
5406
5407 static bool gfx_v8_0_check_soft_reset(void *handle)
5408 {
5409         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5410         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5411         u32 tmp;
5412
5413         /* GRBM_STATUS */
5414         tmp = RREG32(mmGRBM_STATUS);
5415         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5416                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5417                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5418                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5419                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5420                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5421                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5422                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5423                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5424                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5425                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5426                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5427                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5428         }
5429
5430         /* GRBM_STATUS2 */
5431         tmp = RREG32(mmGRBM_STATUS2);
5432         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5433                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5434                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5435
5436         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5437             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5438             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5439                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5440                                                 SOFT_RESET_CPF, 1);
5441                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5442                                                 SOFT_RESET_CPC, 1);
5443                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5444                                                 SOFT_RESET_CPG, 1);
5445                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5446                                                 SOFT_RESET_GRBM, 1);
5447         }
5448
5449         /* SRBM_STATUS */
5450         tmp = RREG32(mmSRBM_STATUS);
5451         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5452                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5453                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5454         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5455                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5456                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5457
5458         if (grbm_soft_reset || srbm_soft_reset) {
5459                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5460                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5461                 return true;
5462         } else {
5463                 adev->gfx.grbm_soft_reset = 0;
5464                 adev->gfx.srbm_soft_reset = 0;
5465                 return false;
5466         }
5467 }
5468
5469 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5470                                   struct amdgpu_ring *ring)
5471 {
5472         int i;
5473
5474         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5475         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5476                 u32 tmp;
5477                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5478                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5479                                     DEQUEUE_REQ, 2);
5480                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5481                 for (i = 0; i < adev->usec_timeout; i++) {
5482                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5483                                 break;
5484                         udelay(1);
5485                 }
5486         }
5487 }
5488
5489 static int gfx_v8_0_pre_soft_reset(void *handle)
5490 {
5491         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5492         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5493
5494         if ((!adev->gfx.grbm_soft_reset) &&
5495             (!adev->gfx.srbm_soft_reset))
5496                 return 0;
5497
5498         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5499         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5500
5501         /* stop the rlc */
5502         gfx_v8_0_rlc_stop(adev);
5503
5504         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5505             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5506                 /* Disable GFX parsing/prefetching */
5507                 gfx_v8_0_cp_gfx_enable(adev, false);
5508
5509         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5510             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5511             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5512             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5513                 int i;
5514
5515                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5516                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5517
5518                         gfx_v8_0_inactive_hqd(adev, ring);
5519                 }
5520                 /* Disable MEC parsing/prefetching */
5521                 gfx_v8_0_cp_compute_enable(adev, false);
5522         }
5523
5524        return 0;
5525 }
5526
5527 static int gfx_v8_0_soft_reset(void *handle)
5528 {
5529         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5530         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5531         u32 tmp;
5532
5533         if ((!adev->gfx.grbm_soft_reset) &&
5534             (!adev->gfx.srbm_soft_reset))
5535                 return 0;
5536
5537         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5538         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5539
5540         if (grbm_soft_reset || srbm_soft_reset) {
5541                 tmp = RREG32(mmGMCON_DEBUG);
5542                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5543                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5544                 WREG32(mmGMCON_DEBUG, tmp);
5545                 udelay(50);
5546         }
5547
5548         if (grbm_soft_reset) {
5549                 tmp = RREG32(mmGRBM_SOFT_RESET);
5550                 tmp |= grbm_soft_reset;
5551                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5552                 WREG32(mmGRBM_SOFT_RESET, tmp);
5553                 tmp = RREG32(mmGRBM_SOFT_RESET);
5554
5555                 udelay(50);
5556
5557                 tmp &= ~grbm_soft_reset;
5558                 WREG32(mmGRBM_SOFT_RESET, tmp);
5559                 tmp = RREG32(mmGRBM_SOFT_RESET);
5560         }
5561
5562         if (srbm_soft_reset) {
5563                 tmp = RREG32(mmSRBM_SOFT_RESET);
5564                 tmp |= srbm_soft_reset;
5565                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5566                 WREG32(mmSRBM_SOFT_RESET, tmp);
5567                 tmp = RREG32(mmSRBM_SOFT_RESET);
5568
5569                 udelay(50);
5570
5571                 tmp &= ~srbm_soft_reset;
5572                 WREG32(mmSRBM_SOFT_RESET, tmp);
5573                 tmp = RREG32(mmSRBM_SOFT_RESET);
5574         }
5575
5576         if (grbm_soft_reset || srbm_soft_reset) {
5577                 tmp = RREG32(mmGMCON_DEBUG);
5578                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5579                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5580                 WREG32(mmGMCON_DEBUG, tmp);
5581         }
5582
5583         /* Wait a little for things to settle down */
5584         udelay(50);
5585
5586         return 0;
5587 }
5588
5589 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5590                               struct amdgpu_ring *ring)
5591 {
5592         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5593         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5594         WREG32(mmCP_HQD_PQ_RPTR, 0);
5595         WREG32(mmCP_HQD_PQ_WPTR, 0);
5596         vi_srbm_select(adev, 0, 0, 0, 0);
5597 }
5598
5599 static int gfx_v8_0_post_soft_reset(void *handle)
5600 {
5601         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5602         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5603
5604         if ((!adev->gfx.grbm_soft_reset) &&
5605             (!adev->gfx.srbm_soft_reset))
5606                 return 0;
5607
5608         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5609         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5610
5611         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5612             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5613                 gfx_v8_0_cp_gfx_resume(adev);
5614
5615         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5616             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5617             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5618             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5619                 int i;
5620
5621                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5622                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5623
5624                         gfx_v8_0_init_hqd(adev, ring);
5625                 }
5626                 gfx_v8_0_cp_compute_resume(adev);
5627         }
5628         gfx_v8_0_rlc_start(adev);
5629
5630         return 0;
5631 }
5632
5633 /**
5634  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5635  *
5636  * @adev: amdgpu_device pointer
5637  *
5638  * Fetches a GPU clock counter snapshot.
5639  * Returns the 64 bit clock counter snapshot.
5640  */
5641 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5642 {
5643         uint64_t clock;
5644
5645         mutex_lock(&adev->gfx.gpu_clock_mutex);
5646         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5647         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5648                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5649         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5650         return clock;
5651 }
5652
5653 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5654                                           uint32_t vmid,
5655                                           uint32_t gds_base, uint32_t gds_size,
5656                                           uint32_t gws_base, uint32_t gws_size,
5657                                           uint32_t oa_base, uint32_t oa_size)
5658 {
5659         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5660         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5661
5662         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5663         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5664
5665         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5666         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5667
5668         /* GDS Base */
5669         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5670         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5671                                 WRITE_DATA_DST_SEL(0)));
5672         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5673         amdgpu_ring_write(ring, 0);
5674         amdgpu_ring_write(ring, gds_base);
5675
5676         /* GDS Size */
5677         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5678         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5679                                 WRITE_DATA_DST_SEL(0)));
5680         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5681         amdgpu_ring_write(ring, 0);
5682         amdgpu_ring_write(ring, gds_size);
5683
5684         /* GWS */
5685         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5686         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5687                                 WRITE_DATA_DST_SEL(0)));
5688         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5689         amdgpu_ring_write(ring, 0);
5690         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5691
5692         /* OA */
5693         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5694         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5695                                 WRITE_DATA_DST_SEL(0)));
5696         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5697         amdgpu_ring_write(ring, 0);
5698         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5699 }
5700
5701 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5702 {
5703         WREG32(mmSQ_IND_INDEX,
5704                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5705                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5706                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5707                 (SQ_IND_INDEX__FORCE_READ_MASK));
5708         return RREG32(mmSQ_IND_DATA);
5709 }
5710
5711 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5712                            uint32_t wave, uint32_t thread,
5713                            uint32_t regno, uint32_t num, uint32_t *out)
5714 {
5715         WREG32(mmSQ_IND_INDEX,
5716                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5717                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5718                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5719                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5720                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5721                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5722         while (num--)
5723                 *(out++) = RREG32(mmSQ_IND_DATA);
5724 }
5725
5726 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5727 {
5728         /* type 0 wave data */
5729         dst[(*no_fields)++] = 0;
5730         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5731         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5732         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5733         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5734         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5735         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5736         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5737         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5738         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5739         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5740         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5741         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5742         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5743         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5744         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5745         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5746         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5747         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5748 }
5749
5750 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5751                                      uint32_t wave, uint32_t start,
5752                                      uint32_t size, uint32_t *dst)
5753 {
5754         wave_read_regs(
5755                 adev, simd, wave, 0,
5756                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5757 }
5758
5759
5760 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5761         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5762         .select_se_sh = &gfx_v8_0_select_se_sh,
5763         .read_wave_data = &gfx_v8_0_read_wave_data,
5764         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5765 };
5766
5767 static int gfx_v8_0_early_init(void *handle)
5768 {
5769         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5770
5771         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5772         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5773         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5774         gfx_v8_0_set_ring_funcs(adev);
5775         gfx_v8_0_set_irq_funcs(adev);
5776         gfx_v8_0_set_gds_init(adev);
5777         gfx_v8_0_set_rlc_funcs(adev);
5778
5779         return 0;
5780 }
5781
5782 static int gfx_v8_0_late_init(void *handle)
5783 {
5784         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5785         int r;
5786
5787         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5788         if (r)
5789                 return r;
5790
5791         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5792         if (r)
5793                 return r;
5794
5795         /* requires IBs so do in late init after IB pool is initialized */
5796         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5797         if (r)
5798                 return r;
5799
5800         amdgpu_set_powergating_state(adev,
5801                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5802
5803         return 0;
5804 }
5805
5806 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5807                                                        bool enable)
5808 {
5809         if ((adev->asic_type == CHIP_POLARIS11) ||
5810             (adev->asic_type == CHIP_POLARIS12))
5811                 /* Send msg to SMU via Powerplay */
5812                 amdgpu_set_powergating_state(adev,
5813                                              AMD_IP_BLOCK_TYPE_SMC,
5814                                              enable ?
5815                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5816
5817         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5818 }
5819
5820 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5821                                                         bool enable)
5822 {
5823         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5824 }
5825
5826 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5827                 bool enable)
5828 {
5829         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5830 }
5831
5832 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5833                                           bool enable)
5834 {
5835         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5836 }
5837
5838 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5839                                                 bool enable)
5840 {
5841         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5842
5843         /* Read any GFX register to wake up GFX. */
5844         if (!enable)
5845                 RREG32(mmDB_RENDER_CONTROL);
5846 }
5847
5848 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5849                                           bool enable)
5850 {
5851         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5852                 cz_enable_gfx_cg_power_gating(adev, true);
5853                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5854                         cz_enable_gfx_pipeline_power_gating(adev, true);
5855         } else {
5856                 cz_enable_gfx_cg_power_gating(adev, false);
5857                 cz_enable_gfx_pipeline_power_gating(adev, false);
5858         }
5859 }
5860
5861 static int gfx_v8_0_set_powergating_state(void *handle,
5862                                           enum amd_powergating_state state)
5863 {
5864         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5865         bool enable = (state == AMD_PG_STATE_GATE);
5866
5867         if (amdgpu_sriov_vf(adev))
5868                 return 0;
5869
5870         switch (adev->asic_type) {
5871         case CHIP_CARRIZO:
5872         case CHIP_STONEY:
5873
5874                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5875                         cz_enable_sck_slow_down_on_power_up(adev, true);
5876                         cz_enable_sck_slow_down_on_power_down(adev, true);
5877                 } else {
5878                         cz_enable_sck_slow_down_on_power_up(adev, false);
5879                         cz_enable_sck_slow_down_on_power_down(adev, false);
5880                 }
5881                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5882                         cz_enable_cp_power_gating(adev, true);
5883                 else
5884                         cz_enable_cp_power_gating(adev, false);
5885
5886                 cz_update_gfx_cg_power_gating(adev, enable);
5887
5888                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5889                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5890                 else
5891                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5892
5893                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5894                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5895                 else
5896                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5897                 break;
5898         case CHIP_POLARIS11:
5899         case CHIP_POLARIS12:
5900                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5901                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5902                 else
5903                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5904
5905                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5906                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5907                 else
5908                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5909
5910                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5911                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5912                 else
5913                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5914                 break;
5915         default:
5916                 break;
5917         }
5918
5919         return 0;
5920 }
5921
5922 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5923 {
5924         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5925         int data;
5926
5927         if (amdgpu_sriov_vf(adev))
5928                 *flags = 0;
5929
5930         /* AMD_CG_SUPPORT_GFX_MGCG */
5931         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5932         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5933                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5934
5935         /* AMD_CG_SUPPORT_GFX_CGLG */
5936         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5937         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5938                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5939
5940         /* AMD_CG_SUPPORT_GFX_CGLS */
5941         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5942                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5943
5944         /* AMD_CG_SUPPORT_GFX_CGTS */
5945         data = RREG32(mmCGTS_SM_CTRL_REG);
5946         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5947                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5948
5949         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5950         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5951                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5952
5953         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5954         data = RREG32(mmRLC_MEM_SLP_CNTL);
5955         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5956                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5957
5958         /* AMD_CG_SUPPORT_GFX_CP_LS */
5959         data = RREG32(mmCP_MEM_SLP_CNTL);
5960         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5961                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5962 }
5963
5964 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5965                                      uint32_t reg_addr, uint32_t cmd)
5966 {
5967         uint32_t data;
5968
5969         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5970
5971         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5972         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5973
5974         data = RREG32(mmRLC_SERDES_WR_CTRL);
5975         if (adev->asic_type == CHIP_STONEY)
5976                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5977                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5978                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5979                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5980                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5981                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5982                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5983                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5984                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5985         else
5986                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5987                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5988                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5989                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5990                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5991                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5992                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5993                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5994                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5995                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5996                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5997         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5998                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5999                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
6000                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6001
6002         WREG32(mmRLC_SERDES_WR_CTRL, data);
6003 }
6004
6005 #define MSG_ENTER_RLC_SAFE_MODE     1
6006 #define MSG_EXIT_RLC_SAFE_MODE      0
6007 #define RLC_GPR_REG2__REQ_MASK 0x00000001
6008 #define RLC_GPR_REG2__REQ__SHIFT 0
6009 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6010 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
6011
6012 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6013 {
6014         u32 data;
6015         unsigned i;
6016
6017         data = RREG32(mmRLC_CNTL);
6018         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6019                 return;
6020
6021         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6022                 data |= RLC_SAFE_MODE__CMD_MASK;
6023                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6024                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6025                 WREG32(mmRLC_SAFE_MODE, data);
6026
6027                 for (i = 0; i < adev->usec_timeout; i++) {
6028                         if ((RREG32(mmRLC_GPM_STAT) &
6029                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6030                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6031                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6032                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6033                                 break;
6034                         udelay(1);
6035                 }
6036
6037                 for (i = 0; i < adev->usec_timeout; i++) {
6038                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6039                                 break;
6040                         udelay(1);
6041                 }
6042                 adev->gfx.rlc.in_safe_mode = true;
6043         }
6044 }
6045
6046 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6047 {
6048         u32 data = 0;
6049         unsigned i;
6050
6051         data = RREG32(mmRLC_CNTL);
6052         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6053                 return;
6054
6055         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6056                 if (adev->gfx.rlc.in_safe_mode) {
6057                         data |= RLC_SAFE_MODE__CMD_MASK;
6058                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6059                         WREG32(mmRLC_SAFE_MODE, data);
6060                         adev->gfx.rlc.in_safe_mode = false;
6061                 }
6062         }
6063
6064         for (i = 0; i < adev->usec_timeout; i++) {
6065                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6066                         break;
6067                 udelay(1);
6068         }
6069 }
6070
6071 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6072         .enter_safe_mode = iceland_enter_rlc_safe_mode,
6073         .exit_safe_mode = iceland_exit_rlc_safe_mode
6074 };
6075
6076 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6077                                                       bool enable)
6078 {
6079         uint32_t temp, data;
6080
6081         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6082
6083         /* It is disabled by HW by default */
6084         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6085                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6086                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6087                                 /* 1 - RLC memory Light sleep */
6088                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6089
6090                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6091                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6092                 }
6093
6094                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6095                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6096                 if (adev->flags & AMD_IS_APU)
6097                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6098                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6099                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6100                 else
6101                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6102                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6103                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6104                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6105
6106                 if (temp != data)
6107                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6108
6109                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6110                 gfx_v8_0_wait_for_rlc_serdes(adev);
6111
6112                 /* 5 - clear mgcg override */
6113                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6114
6115                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6116                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6117                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6118                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6119                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6120                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6121                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6122                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6123                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6124                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6125                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6126                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6127                         if (temp != data)
6128                                 WREG32(mmCGTS_SM_CTRL_REG, data);
6129                 }
6130                 udelay(50);
6131
6132                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6133                 gfx_v8_0_wait_for_rlc_serdes(adev);
6134         } else {
6135                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6136                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6137                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6138                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6139                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6140                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6141                 if (temp != data)
6142                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6143
6144                 /* 2 - disable MGLS in RLC */
6145                 data = RREG32(mmRLC_MEM_SLP_CNTL);
6146                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6147                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6148                         WREG32(mmRLC_MEM_SLP_CNTL, data);
6149                 }
6150
6151                 /* 3 - disable MGLS in CP */
6152                 data = RREG32(mmCP_MEM_SLP_CNTL);
6153                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6154                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6155                         WREG32(mmCP_MEM_SLP_CNTL, data);
6156                 }
6157
6158                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6159                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6160                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6161                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6162                 if (temp != data)
6163                         WREG32(mmCGTS_SM_CTRL_REG, data);
6164
6165                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6166                 gfx_v8_0_wait_for_rlc_serdes(adev);
6167
6168                 /* 6 - set mgcg override */
6169                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6170
6171                 udelay(50);
6172
6173                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6174                 gfx_v8_0_wait_for_rlc_serdes(adev);
6175         }
6176
6177         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6178 }
6179
6180 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6181                                                       bool enable)
6182 {
6183         uint32_t temp, temp1, data, data1;
6184
6185         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6186
6187         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6188
6189         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6190                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6191                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6192                 if (temp1 != data1)
6193                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6194
6195                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6196                 gfx_v8_0_wait_for_rlc_serdes(adev);
6197
6198                 /* 2 - clear cgcg override */
6199                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6200
6201                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6202                 gfx_v8_0_wait_for_rlc_serdes(adev);
6203
6204                 /* 3 - write cmd to set CGLS */
6205                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6206
6207                 /* 4 - enable cgcg */
6208                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6209
6210                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6211                         /* enable cgls*/
6212                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6213
6214                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6215                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6216
6217                         if (temp1 != data1)
6218                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6219                 } else {
6220                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6221                 }
6222
6223                 if (temp != data)
6224                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6225
6226                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6227                  * Cmp_busy/GFX_Idle interrupts
6228                  */
6229                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6230         } else {
6231                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6232                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6233
6234                 /* TEST CGCG */
6235                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6236                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6237                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6238                 if (temp1 != data1)
6239                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6240
6241                 /* read gfx register to wake up cgcg */
6242                 RREG32(mmCB_CGTT_SCLK_CTRL);
6243                 RREG32(mmCB_CGTT_SCLK_CTRL);
6244                 RREG32(mmCB_CGTT_SCLK_CTRL);
6245                 RREG32(mmCB_CGTT_SCLK_CTRL);
6246
6247                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6248                 gfx_v8_0_wait_for_rlc_serdes(adev);
6249
6250                 /* write cmd to Set CGCG Overrride */
6251                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6252
6253                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6254                 gfx_v8_0_wait_for_rlc_serdes(adev);
6255
6256                 /* write cmd to Clear CGLS */
6257                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6258
6259                 /* disable cgcg, cgls should be disabled too. */
6260                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6261                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6262                 if (temp != data)
6263                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6264         }
6265
6266         gfx_v8_0_wait_for_rlc_serdes(adev);
6267
6268         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6269 }
6270 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6271                                             bool enable)
6272 {
6273         if (enable) {
6274                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6275                  * ===  MGCG + MGLS + TS(CG/LS) ===
6276                  */
6277                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6278                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6279         } else {
6280                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6281                  * ===  CGCG + CGLS ===
6282                  */
6283                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6284                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6285         }
6286         return 0;
6287 }
6288
6289 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6290                                           enum amd_clockgating_state state)
6291 {
6292         uint32_t msg_id, pp_state = 0;
6293         uint32_t pp_support_state = 0;
6294         void *pp_handle = adev->powerplay.pp_handle;
6295
6296         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6297                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6298                         pp_support_state = PP_STATE_SUPPORT_LS;
6299                         pp_state = PP_STATE_LS;
6300                 }
6301                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6302                         pp_support_state |= PP_STATE_SUPPORT_CG;
6303                         pp_state |= PP_STATE_CG;
6304                 }
6305                 if (state == AMD_CG_STATE_UNGATE)
6306                         pp_state = 0;
6307
6308                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6309                                 PP_BLOCK_GFX_CG,
6310                                 pp_support_state,
6311                                 pp_state);
6312                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6313         }
6314
6315         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6316                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6317                         pp_support_state = PP_STATE_SUPPORT_LS;
6318                         pp_state = PP_STATE_LS;
6319                 }
6320
6321                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6322                         pp_support_state |= PP_STATE_SUPPORT_CG;
6323                         pp_state |= PP_STATE_CG;
6324                 }
6325
6326                 if (state == AMD_CG_STATE_UNGATE)
6327                         pp_state = 0;
6328
6329                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6330                                 PP_BLOCK_GFX_MG,
6331                                 pp_support_state,
6332                                 pp_state);
6333                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6334         }
6335
6336         return 0;
6337 }
6338
6339 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6340                                           enum amd_clockgating_state state)
6341 {
6342
6343         uint32_t msg_id, pp_state = 0;
6344         uint32_t pp_support_state = 0;
6345         void *pp_handle = adev->powerplay.pp_handle;
6346
6347         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6348                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6349                         pp_support_state = PP_STATE_SUPPORT_LS;
6350                         pp_state = PP_STATE_LS;
6351                 }
6352                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6353                         pp_support_state |= PP_STATE_SUPPORT_CG;
6354                         pp_state |= PP_STATE_CG;
6355                 }
6356                 if (state == AMD_CG_STATE_UNGATE)
6357                         pp_state = 0;
6358
6359                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6360                                 PP_BLOCK_GFX_CG,
6361                                 pp_support_state,
6362                                 pp_state);
6363                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6364         }
6365
6366         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6367                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6368                         pp_support_state = PP_STATE_SUPPORT_LS;
6369                         pp_state = PP_STATE_LS;
6370                 }
6371                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6372                         pp_support_state |= PP_STATE_SUPPORT_CG;
6373                         pp_state |= PP_STATE_CG;
6374                 }
6375                 if (state == AMD_CG_STATE_UNGATE)
6376                         pp_state = 0;
6377
6378                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6379                                 PP_BLOCK_GFX_3D,
6380                                 pp_support_state,
6381                                 pp_state);
6382                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6383         }
6384
6385         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6386                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6387                         pp_support_state = PP_STATE_SUPPORT_LS;
6388                         pp_state = PP_STATE_LS;
6389                 }
6390
6391                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6392                         pp_support_state |= PP_STATE_SUPPORT_CG;
6393                         pp_state |= PP_STATE_CG;
6394                 }
6395
6396                 if (state == AMD_CG_STATE_UNGATE)
6397                         pp_state = 0;
6398
6399                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6400                                 PP_BLOCK_GFX_MG,
6401                                 pp_support_state,
6402                                 pp_state);
6403                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6404         }
6405
6406         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6407                 pp_support_state = PP_STATE_SUPPORT_LS;
6408
6409                 if (state == AMD_CG_STATE_UNGATE)
6410                         pp_state = 0;
6411                 else
6412                         pp_state = PP_STATE_LS;
6413
6414                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6415                                 PP_BLOCK_GFX_RLC,
6416                                 pp_support_state,
6417                                 pp_state);
6418                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6419         }
6420
6421         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6422                 pp_support_state = PP_STATE_SUPPORT_LS;
6423
6424                 if (state == AMD_CG_STATE_UNGATE)
6425                         pp_state = 0;
6426                 else
6427                         pp_state = PP_STATE_LS;
6428                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6429                         PP_BLOCK_GFX_CP,
6430                         pp_support_state,
6431                         pp_state);
6432                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6433         }
6434
6435         return 0;
6436 }
6437
6438 static int gfx_v8_0_set_clockgating_state(void *handle,
6439                                           enum amd_clockgating_state state)
6440 {
6441         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6442
6443         if (amdgpu_sriov_vf(adev))
6444                 return 0;
6445
6446         switch (adev->asic_type) {
6447         case CHIP_FIJI:
6448         case CHIP_CARRIZO:
6449         case CHIP_STONEY:
6450                 gfx_v8_0_update_gfx_clock_gating(adev,
6451                                                  state == AMD_CG_STATE_GATE);
6452                 break;
6453         case CHIP_TONGA:
6454                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6455                 break;
6456         case CHIP_POLARIS10:
6457         case CHIP_POLARIS11:
6458         case CHIP_POLARIS12:
6459                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6460                 break;
6461         default:
6462                 break;
6463         }
6464         return 0;
6465 }
6466
6467 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6468 {
6469         return ring->adev->wb.wb[ring->rptr_offs];
6470 }
6471
6472 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6473 {
6474         struct amdgpu_device *adev = ring->adev;
6475
6476         if (ring->use_doorbell)
6477                 /* XXX check if swapping is necessary on BE */
6478                 return ring->adev->wb.wb[ring->wptr_offs];
6479         else
6480                 return RREG32(mmCP_RB0_WPTR);
6481 }
6482
6483 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6484 {
6485         struct amdgpu_device *adev = ring->adev;
6486
6487         if (ring->use_doorbell) {
6488                 /* XXX check if swapping is necessary on BE */
6489                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6490                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6491         } else {
6492                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6493                 (void)RREG32(mmCP_RB0_WPTR);
6494         }
6495 }
6496
6497 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6498 {
6499         u32 ref_and_mask, reg_mem_engine;
6500
6501         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6502             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6503                 switch (ring->me) {
6504                 case 1:
6505                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6506                         break;
6507                 case 2:
6508                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6509                         break;
6510                 default:
6511                         return;
6512                 }
6513                 reg_mem_engine = 0;
6514         } else {
6515                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6516                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6517         }
6518
6519         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6520         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6521                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6522                                  reg_mem_engine));
6523         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6524         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6525         amdgpu_ring_write(ring, ref_and_mask);
6526         amdgpu_ring_write(ring, ref_and_mask);
6527         amdgpu_ring_write(ring, 0x20); /* poll interval */
6528 }
6529
6530 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6531 {
6532         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6533         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6534                 EVENT_INDEX(4));
6535
6536         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6537         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6538                 EVENT_INDEX(0));
6539 }
6540
6541
6542 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6543 {
6544         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6545         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6546                                  WRITE_DATA_DST_SEL(0) |
6547                                  WR_CONFIRM));
6548         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6549         amdgpu_ring_write(ring, 0);
6550         amdgpu_ring_write(ring, 1);
6551
6552 }
6553
6554 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6555                                       struct amdgpu_ib *ib,
6556                                       unsigned vm_id, bool ctx_switch)
6557 {
6558         u32 header, control = 0;
6559
6560         if (ib->flags & AMDGPU_IB_FLAG_CE)
6561                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6562         else
6563                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6564
6565         control |= ib->length_dw | (vm_id << 24);
6566
6567         if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
6568                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6569
6570         amdgpu_ring_write(ring, header);
6571         amdgpu_ring_write(ring,
6572 #ifdef __BIG_ENDIAN
6573                           (2 << 0) |
6574 #endif
6575                           (ib->gpu_addr & 0xFFFFFFFC));
6576         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6577         amdgpu_ring_write(ring, control);
6578 }
6579
6580 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6581                                           struct amdgpu_ib *ib,
6582                                           unsigned vm_id, bool ctx_switch)
6583 {
6584         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6585
6586         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6587         amdgpu_ring_write(ring,
6588 #ifdef __BIG_ENDIAN
6589                                 (2 << 0) |
6590 #endif
6591                                 (ib->gpu_addr & 0xFFFFFFFC));
6592         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6593         amdgpu_ring_write(ring, control);
6594 }
6595
6596 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6597                                          u64 seq, unsigned flags)
6598 {
6599         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6600         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6601
6602         /* EVENT_WRITE_EOP - flush caches, send int */
6603         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6604         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6605                                  EOP_TC_ACTION_EN |
6606                                  EOP_TC_WB_ACTION_EN |
6607                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6608                                  EVENT_INDEX(5)));
6609         amdgpu_ring_write(ring, addr & 0xfffffffc);
6610         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6611                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6612         amdgpu_ring_write(ring, lower_32_bits(seq));
6613         amdgpu_ring_write(ring, upper_32_bits(seq));
6614
6615 }
6616
6617 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6618 {
6619         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6620         uint32_t seq = ring->fence_drv.sync_seq;
6621         uint64_t addr = ring->fence_drv.gpu_addr;
6622
6623         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6624         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6625                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6626                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6627         amdgpu_ring_write(ring, addr & 0xfffffffc);
6628         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6629         amdgpu_ring_write(ring, seq);
6630         amdgpu_ring_write(ring, 0xffffffff);
6631         amdgpu_ring_write(ring, 4); /* poll interval */
6632 }
6633
6634 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6635                                         unsigned vm_id, uint64_t pd_addr)
6636 {
6637         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6638
6639         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6640         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6641                                  WRITE_DATA_DST_SEL(0)) |
6642                                  WR_CONFIRM);
6643         if (vm_id < 8) {
6644                 amdgpu_ring_write(ring,
6645                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6646         } else {
6647                 amdgpu_ring_write(ring,
6648                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6649         }
6650         amdgpu_ring_write(ring, 0);
6651         amdgpu_ring_write(ring, pd_addr >> 12);
6652
6653         /* bits 0-15 are the VM contexts0-15 */
6654         /* invalidate the cache */
6655         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6656         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6657                                  WRITE_DATA_DST_SEL(0)));
6658         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6659         amdgpu_ring_write(ring, 0);
6660         amdgpu_ring_write(ring, 1 << vm_id);
6661
6662         /* wait for the invalidate to complete */
6663         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6664         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6665                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6666                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6667         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6668         amdgpu_ring_write(ring, 0);
6669         amdgpu_ring_write(ring, 0); /* ref */
6670         amdgpu_ring_write(ring, 0); /* mask */
6671         amdgpu_ring_write(ring, 0x20); /* poll interval */
6672
6673         /* compute doesn't have PFP */
6674         if (usepfp) {
6675                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6676                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6677                 amdgpu_ring_write(ring, 0x0);
6678         }
6679 }
6680
6681 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6682 {
6683         return ring->adev->wb.wb[ring->wptr_offs];
6684 }
6685
6686 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6687 {
6688         struct amdgpu_device *adev = ring->adev;
6689
6690         /* XXX check if swapping is necessary on BE */
6691         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6692         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6693 }
6694
6695 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6696                                              u64 addr, u64 seq,
6697                                              unsigned flags)
6698 {
6699         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6700         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6701
6702         /* RELEASE_MEM - flush caches, send int */
6703         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6704         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6705                                  EOP_TC_ACTION_EN |
6706                                  EOP_TC_WB_ACTION_EN |
6707                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6708                                  EVENT_INDEX(5)));
6709         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6710         amdgpu_ring_write(ring, addr & 0xfffffffc);
6711         amdgpu_ring_write(ring, upper_32_bits(addr));
6712         amdgpu_ring_write(ring, lower_32_bits(seq));
6713         amdgpu_ring_write(ring, upper_32_bits(seq));
6714 }
6715
6716 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6717                                          u64 seq, unsigned int flags)
6718 {
6719         /* we only allocate 32bit for each seq wb address */
6720         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6721
6722         /* write fence seq to the "addr" */
6723         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6724         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6725                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6726         amdgpu_ring_write(ring, lower_32_bits(addr));
6727         amdgpu_ring_write(ring, upper_32_bits(addr));
6728         amdgpu_ring_write(ring, lower_32_bits(seq));
6729
6730         if (flags & AMDGPU_FENCE_FLAG_INT) {
6731                 /* set register to trigger INT */
6732                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6733                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6734                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6735                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6736                 amdgpu_ring_write(ring, 0);
6737                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6738         }
6739 }
6740
6741 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6742 {
6743         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6744         amdgpu_ring_write(ring, 0);
6745 }
6746
6747 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6748 {
6749         uint32_t dw2 = 0;
6750
6751         if (amdgpu_sriov_vf(ring->adev))
6752                 gfx_v8_0_ring_emit_ce_meta_init(ring,
6753                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6754
6755         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6756         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6757                 gfx_v8_0_ring_emit_vgt_flush(ring);
6758                 /* set load_global_config & load_global_uconfig */
6759                 dw2 |= 0x8001;
6760                 /* set load_cs_sh_regs */
6761                 dw2 |= 0x01000000;
6762                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6763                 dw2 |= 0x10002;
6764
6765                 /* set load_ce_ram if preamble presented */
6766                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6767                         dw2 |= 0x10000000;
6768         } else {
6769                 /* still load_ce_ram if this is the first time preamble presented
6770                  * although there is no context switch happens.
6771                  */
6772                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6773                         dw2 |= 0x10000000;
6774         }
6775
6776         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6777         amdgpu_ring_write(ring, dw2);
6778         amdgpu_ring_write(ring, 0);
6779
6780         if (amdgpu_sriov_vf(ring->adev))
6781                 gfx_v8_0_ring_emit_de_meta_init(ring,
6782                         (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6783 }
6784
6785 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6786 {
6787         unsigned ret;
6788
6789         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6790         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6791         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6792         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6793         ret = ring->wptr & ring->buf_mask;
6794         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6795         return ret;
6796 }
6797
6798 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6799 {
6800         unsigned cur;
6801
6802         BUG_ON(offset > ring->buf_mask);
6803         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6804
6805         cur = (ring->wptr & ring->buf_mask) - 1;
6806         if (likely(cur > offset))
6807                 ring->ring[offset] = cur - offset;
6808         else
6809                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6810 }
6811
6812
6813 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6814 {
6815         struct amdgpu_device *adev = ring->adev;
6816
6817         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6818         amdgpu_ring_write(ring, 0 |     /* src: register*/
6819                                 (5 << 8) |      /* dst: memory */
6820                                 (1 << 20));     /* write confirm */
6821         amdgpu_ring_write(ring, reg);
6822         amdgpu_ring_write(ring, 0);
6823         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6824                                 adev->virt.reg_val_offs * 4));
6825         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6826                                 adev->virt.reg_val_offs * 4));
6827 }
6828
6829 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6830                                   uint32_t val)
6831 {
6832         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6833         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6834         amdgpu_ring_write(ring, reg);
6835         amdgpu_ring_write(ring, 0);
6836         amdgpu_ring_write(ring, val);
6837 }
6838
6839 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6840                                                  enum amdgpu_interrupt_state state)
6841 {
6842         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6843                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6844 }
6845
6846 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6847                                                      int me, int pipe,
6848                                                      enum amdgpu_interrupt_state state)
6849 {
6850         /*
6851          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6852          * handles the setting of interrupts for this specific pipe. All other
6853          * pipes' interrupts are set by amdkfd.
6854          */
6855
6856         if (me == 1) {
6857                 switch (pipe) {
6858                 case 0:
6859                         break;
6860                 default:
6861                         DRM_DEBUG("invalid pipe %d\n", pipe);
6862                         return;
6863                 }
6864         } else {
6865                 DRM_DEBUG("invalid me %d\n", me);
6866                 return;
6867         }
6868
6869         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6870                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6871 }
6872
6873 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6874                                              struct amdgpu_irq_src *source,
6875                                              unsigned type,
6876                                              enum amdgpu_interrupt_state state)
6877 {
6878         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6879                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6880
6881         return 0;
6882 }
6883
6884 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6885                                               struct amdgpu_irq_src *source,
6886                                               unsigned type,
6887                                               enum amdgpu_interrupt_state state)
6888 {
6889         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6890                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6891
6892         return 0;
6893 }
6894
6895 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6896                                             struct amdgpu_irq_src *src,
6897                                             unsigned type,
6898                                             enum amdgpu_interrupt_state state)
6899 {
6900         switch (type) {
6901         case AMDGPU_CP_IRQ_GFX_EOP:
6902                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6903                 break;
6904         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6905                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6906                 break;
6907         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6908                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6909                 break;
6910         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6911                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6912                 break;
6913         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6914                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6915                 break;
6916         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6917                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6918                 break;
6919         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6920                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6921                 break;
6922         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6923                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6924                 break;
6925         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6926                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6927                 break;
6928         default:
6929                 break;
6930         }
6931         return 0;
6932 }
6933
6934 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6935                             struct amdgpu_irq_src *source,
6936                             struct amdgpu_iv_entry *entry)
6937 {
6938         int i;
6939         u8 me_id, pipe_id, queue_id;
6940         struct amdgpu_ring *ring;
6941
6942         DRM_DEBUG("IH: CP EOP\n");
6943         me_id = (entry->ring_id & 0x0c) >> 2;
6944         pipe_id = (entry->ring_id & 0x03) >> 0;
6945         queue_id = (entry->ring_id & 0x70) >> 4;
6946
6947         switch (me_id) {
6948         case 0:
6949                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6950                 break;
6951         case 1:
6952         case 2:
6953                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6954                         ring = &adev->gfx.compute_ring[i];
6955                         /* Per-queue interrupt is supported for MEC starting from VI.
6956                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6957                           */
6958                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6959                                 amdgpu_fence_process(ring);
6960                 }
6961                 break;
6962         }
6963         return 0;
6964 }
6965
6966 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6967                                  struct amdgpu_irq_src *source,
6968                                  struct amdgpu_iv_entry *entry)
6969 {
6970         DRM_ERROR("Illegal register access in command stream\n");
6971         schedule_work(&adev->reset_work);
6972         return 0;
6973 }
6974
6975 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6976                                   struct amdgpu_irq_src *source,
6977                                   struct amdgpu_iv_entry *entry)
6978 {
6979         DRM_ERROR("Illegal instruction in command stream\n");
6980         schedule_work(&adev->reset_work);
6981         return 0;
6982 }
6983
6984 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6985                                             struct amdgpu_irq_src *src,
6986                                             unsigned int type,
6987                                             enum amdgpu_interrupt_state state)
6988 {
6989         uint32_t tmp, target;
6990         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6991
6992         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
6993
6994         if (ring->me == 1)
6995                 target = mmCP_ME1_PIPE0_INT_CNTL;
6996         else
6997                 target = mmCP_ME2_PIPE0_INT_CNTL;
6998         target += ring->pipe;
6999
7000         switch (type) {
7001         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7002                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
7003                         tmp = RREG32(mmCPC_INT_CNTL);
7004                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
7005                                                  GENERIC2_INT_ENABLE, 0);
7006                         WREG32(mmCPC_INT_CNTL, tmp);
7007
7008                         tmp = RREG32(target);
7009                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
7010                                                  GENERIC2_INT_ENABLE, 0);
7011                         WREG32(target, tmp);
7012                 } else {
7013                         tmp = RREG32(mmCPC_INT_CNTL);
7014                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
7015                                                  GENERIC2_INT_ENABLE, 1);
7016                         WREG32(mmCPC_INT_CNTL, tmp);
7017
7018                         tmp = RREG32(target);
7019                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
7020                                                  GENERIC2_INT_ENABLE, 1);
7021                         WREG32(target, tmp);
7022                 }
7023                 break;
7024         default:
7025                 BUG(); /* kiq only support GENERIC2_INT now */
7026                 break;
7027         }
7028         return 0;
7029 }
7030
7031 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7032                             struct amdgpu_irq_src *source,
7033                             struct amdgpu_iv_entry *entry)
7034 {
7035         u8 me_id, pipe_id, queue_id;
7036         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7037
7038         BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
7039
7040         me_id = (entry->ring_id & 0x0c) >> 2;
7041         pipe_id = (entry->ring_id & 0x03) >> 0;
7042         queue_id = (entry->ring_id & 0x70) >> 4;
7043         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7044                    me_id, pipe_id, queue_id);
7045
7046         amdgpu_fence_process(ring);
7047         return 0;
7048 }
7049
7050 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7051         .name = "gfx_v8_0",
7052         .early_init = gfx_v8_0_early_init,
7053         .late_init = gfx_v8_0_late_init,
7054         .sw_init = gfx_v8_0_sw_init,
7055         .sw_fini = gfx_v8_0_sw_fini,
7056         .hw_init = gfx_v8_0_hw_init,
7057         .hw_fini = gfx_v8_0_hw_fini,
7058         .suspend = gfx_v8_0_suspend,
7059         .resume = gfx_v8_0_resume,
7060         .is_idle = gfx_v8_0_is_idle,
7061         .wait_for_idle = gfx_v8_0_wait_for_idle,
7062         .check_soft_reset = gfx_v8_0_check_soft_reset,
7063         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7064         .soft_reset = gfx_v8_0_soft_reset,
7065         .post_soft_reset = gfx_v8_0_post_soft_reset,
7066         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7067         .set_powergating_state = gfx_v8_0_set_powergating_state,
7068         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7069 };
7070
7071 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7072         .type = AMDGPU_RING_TYPE_GFX,
7073         .align_mask = 0xff,
7074         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7075         .support_64bit_ptrs = false,
7076         .get_rptr = gfx_v8_0_ring_get_rptr,
7077         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7078         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7079         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7080                 5 +  /* COND_EXEC */
7081                 7 +  /* PIPELINE_SYNC */
7082                 19 + /* VM_FLUSH */
7083                 8 +  /* FENCE for VM_FLUSH */
7084                 20 + /* GDS switch */
7085                 4 + /* double SWITCH_BUFFER,
7086                        the first COND_EXEC jump to the place just
7087                            prior to this double SWITCH_BUFFER  */
7088                 5 + /* COND_EXEC */
7089                 7 +      /*     HDP_flush */
7090                 4 +      /*     VGT_flush */
7091                 14 + /* CE_META */
7092                 31 + /* DE_META */
7093                 3 + /* CNTX_CTRL */
7094                 5 + /* HDP_INVL */
7095                 8 + 8 + /* FENCE x2 */
7096                 2, /* SWITCH_BUFFER */
7097         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7098         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7099         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7100         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7101         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7102         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7103         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7104         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7105         .test_ring = gfx_v8_0_ring_test_ring,
7106         .test_ib = gfx_v8_0_ring_test_ib,
7107         .insert_nop = amdgpu_ring_insert_nop,
7108         .pad_ib = amdgpu_ring_generic_pad_ib,
7109         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7110         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7111         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7112         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7113 };
7114
7115 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7116         .type = AMDGPU_RING_TYPE_COMPUTE,
7117         .align_mask = 0xff,
7118         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7119         .support_64bit_ptrs = false,
7120         .get_rptr = gfx_v8_0_ring_get_rptr,
7121         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7122         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7123         .emit_frame_size =
7124                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7125                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7126                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7127                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7128                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7129                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7130         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7131         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7132         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7133         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7134         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7135         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7136         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7137         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7138         .test_ring = gfx_v8_0_ring_test_ring,
7139         .test_ib = gfx_v8_0_ring_test_ib,
7140         .insert_nop = amdgpu_ring_insert_nop,
7141         .pad_ib = amdgpu_ring_generic_pad_ib,
7142 };
7143
7144 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7145         .type = AMDGPU_RING_TYPE_KIQ,
7146         .align_mask = 0xff,
7147         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7148         .support_64bit_ptrs = false,
7149         .get_rptr = gfx_v8_0_ring_get_rptr,
7150         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7151         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7152         .emit_frame_size =
7153                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7154                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7155                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7156                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7157                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7158                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7159         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7160         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7161         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7162         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7163         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7164         .test_ring = gfx_v8_0_ring_test_ring,
7165         .test_ib = gfx_v8_0_ring_test_ib,
7166         .insert_nop = amdgpu_ring_insert_nop,
7167         .pad_ib = amdgpu_ring_generic_pad_ib,
7168         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7169         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7170 };
7171
7172 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7173 {
7174         int i;
7175
7176         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7177
7178         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7179                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7180
7181         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7182                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7183 }
7184
7185 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7186         .set = gfx_v8_0_set_eop_interrupt_state,
7187         .process = gfx_v8_0_eop_irq,
7188 };
7189
7190 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7191         .set = gfx_v8_0_set_priv_reg_fault_state,
7192         .process = gfx_v8_0_priv_reg_irq,
7193 };
7194
7195 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7196         .set = gfx_v8_0_set_priv_inst_fault_state,
7197         .process = gfx_v8_0_priv_inst_irq,
7198 };
7199
7200 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7201         .set = gfx_v8_0_kiq_set_interrupt_state,
7202         .process = gfx_v8_0_kiq_irq,
7203 };
7204
7205 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7206 {
7207         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7208         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7209
7210         adev->gfx.priv_reg_irq.num_types = 1;
7211         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7212
7213         adev->gfx.priv_inst_irq.num_types = 1;
7214         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7215
7216         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7217         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7218 }
7219
7220 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7221 {
7222         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7223 }
7224
7225 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7226 {
7227         /* init asci gds info */
7228         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7229         adev->gds.gws.total_size = 64;
7230         adev->gds.oa.total_size = 16;
7231
7232         if (adev->gds.mem.total_size == 64 * 1024) {
7233                 adev->gds.mem.gfx_partition_size = 4096;
7234                 adev->gds.mem.cs_partition_size = 4096;
7235
7236                 adev->gds.gws.gfx_partition_size = 4;
7237                 adev->gds.gws.cs_partition_size = 4;
7238
7239                 adev->gds.oa.gfx_partition_size = 4;
7240                 adev->gds.oa.cs_partition_size = 1;
7241         } else {
7242                 adev->gds.mem.gfx_partition_size = 1024;
7243                 adev->gds.mem.cs_partition_size = 1024;
7244
7245                 adev->gds.gws.gfx_partition_size = 16;
7246                 adev->gds.gws.cs_partition_size = 16;
7247
7248                 adev->gds.oa.gfx_partition_size = 4;
7249                 adev->gds.oa.cs_partition_size = 4;
7250         }
7251 }
7252
7253 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7254                                                  u32 bitmap)
7255 {
7256         u32 data;
7257
7258         if (!bitmap)
7259                 return;
7260
7261         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7262         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7263
7264         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7265 }
7266
7267 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7268 {
7269         u32 data, mask;
7270
7271         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7272                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7273
7274         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7275
7276         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7277 }
7278
7279 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7280 {
7281         int i, j, k, counter, active_cu_number = 0;
7282         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7283         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7284         unsigned disable_masks[4 * 2];
7285
7286         memset(cu_info, 0, sizeof(*cu_info));
7287
7288         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7289
7290         mutex_lock(&adev->grbm_idx_mutex);
7291         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7292                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7293                         mask = 1;
7294                         ao_bitmap = 0;
7295                         counter = 0;
7296                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7297                         if (i < 4 && j < 2)
7298                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7299                                         adev, disable_masks[i * 2 + j]);
7300                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7301                         cu_info->bitmap[i][j] = bitmap;
7302
7303                         for (k = 0; k < 16; k ++) {
7304                                 if (bitmap & mask) {
7305                                         if (counter < 2)
7306                                                 ao_bitmap |= mask;
7307                                         counter ++;
7308                                 }
7309                                 mask <<= 1;
7310                         }
7311                         active_cu_number += counter;
7312                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7313                 }
7314         }
7315         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7316         mutex_unlock(&adev->grbm_idx_mutex);
7317
7318         cu_info->number = active_cu_number;
7319         cu_info->ao_cu_mask = ao_cu_mask;
7320 }
7321
7322 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7323 {
7324         .type = AMD_IP_BLOCK_TYPE_GFX,
7325         .major = 8,
7326         .minor = 0,
7327         .rev = 0,
7328         .funcs = &gfx_v8_0_ip_funcs,
7329 };
7330
7331 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7332 {
7333         .type = AMD_IP_BLOCK_TYPE_GFX,
7334         .major = 8,
7335         .minor = 1,
7336         .rev = 0,
7337         .funcs = &gfx_v8_0_ip_funcs,
7338 };
7339
7340 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7341 {
7342         uint64_t ce_payload_addr;
7343         int cnt_ce;
7344         static union {
7345                 struct vi_ce_ib_state regular;
7346                 struct vi_ce_ib_state_chained_ib chained;
7347         } ce_payload = {};
7348
7349         if (ring->adev->virt.chained_ib_support) {
7350                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7351                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7352         } else {
7353                 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
7354                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7355         }
7356
7357         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7358         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7359                                 WRITE_DATA_DST_SEL(8) |
7360                                 WR_CONFIRM) |
7361                                 WRITE_DATA_CACHE_POLICY(0));
7362         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7363         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7364         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7365 }
7366
7367 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7368 {
7369         uint64_t de_payload_addr, gds_addr;
7370         int cnt_de;
7371         static union {
7372                 struct vi_de_ib_state regular;
7373                 struct vi_de_ib_state_chained_ib chained;
7374         } de_payload = {};
7375
7376         gds_addr = csa_addr + 4096;
7377         if (ring->adev->virt.chained_ib_support) {
7378                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7379                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7380                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7381                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7382         } else {
7383                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7384                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7385                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7386                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7387         }
7388
7389         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7390         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7391                                 WRITE_DATA_DST_SEL(8) |
7392                                 WR_CONFIRM) |
7393                                 WRITE_DATA_CACHE_POLICY(0));
7394         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7395         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7396         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7397 }
7398
7399 /* create MQD for each compute queue */
7400 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7401 {
7402         struct amdgpu_ring *ring = NULL;
7403         int r, i;
7404
7405         /* create MQD for KIQ */
7406         ring = &adev->gfx.kiq.ring;
7407         if (!ring->mqd_obj) {
7408                 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7409                                             AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7410                                             &ring->mqd_gpu_addr, &ring->mqd_ptr);
7411                 if (r) {
7412                         dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7413                         return r;
7414                 }
7415
7416                 /* prepare MQD backup */
7417                 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7418                 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7419                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7420         }
7421
7422         /* create MQD for each KCQ */
7423         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7424                 ring = &adev->gfx.compute_ring[i];
7425                 if (!ring->mqd_obj) {
7426                         r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7427                                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7428                                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
7429                         if (r) {
7430                                 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7431                                 return r;
7432                         }
7433
7434                         /* prepare MQD backup */
7435                         adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7436                         if (!adev->gfx.mec.mqd_backup[i])
7437                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7438                 }
7439         }
7440
7441         return 0;
7442 }
7443
7444 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7445 {
7446         struct amdgpu_ring *ring = NULL;
7447         int i;
7448
7449         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7450                 ring = &adev->gfx.compute_ring[i];
7451                 kfree(adev->gfx.mec.mqd_backup[i]);
7452                 amdgpu_bo_free_kernel(&ring->mqd_obj,
7453                                       &ring->mqd_gpu_addr,
7454                                       &ring->mqd_ptr);
7455         }
7456
7457         ring = &adev->gfx.kiq.ring;
7458         kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7459         amdgpu_bo_free_kernel(&ring->mqd_obj,
7460                               &ring->mqd_gpu_addr,
7461                               &ring->mqd_ptr);
7462 }