2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
28 #include "vi_structs.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_NUM_COMPUTE_RINGS 8
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
169 static const u32 golden_settings_tonga_a11[] =
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
189 static const u32 tonga_golden_common_all[] =
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
201 static const u32 tonga_mgcg_cgcg_init[] =
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
280 static const u32 golden_settings_polaris11_a11[] =
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
301 static const u32 polaris11_golden_common_all[] =
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
311 static const u32 golden_settings_polaris10_a11[] =
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
332 static const u32 polaris10_golden_common_all[] =
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 static const u32 fiji_golden_common_all[] =
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
358 static const u32 golden_settings_fiji_a10[] =
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
373 static const u32 fiji_mgcg_cgcg_init[] =
375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
412 static const u32 golden_settings_iceland_a11[] =
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
432 static const u32 iceland_golden_common_all[] =
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
444 static const u32 iceland_mgcg_cgcg_init[] =
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
512 static const u32 cz_golden_settings_a11[] =
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
528 static const u32 cz_golden_common_all[] =
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
540 static const u32 cz_mgcg_cgcg_init[] =
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
619 static const u32 stoney_golden_settings_a11[] =
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
633 static const u32 stoney_golden_common_all[] =
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
645 static const u32 stoney_mgcg_cgcg_init[] =
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
667 switch (adev->asic_type) {
669 amdgpu_program_register_sequence(adev,
670 iceland_mgcg_cgcg_init,
671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 golden_settings_iceland_a11,
674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 amdgpu_program_register_sequence(adev,
676 iceland_golden_common_all,
677 (const u32)ARRAY_SIZE(iceland_golden_common_all));
680 amdgpu_program_register_sequence(adev,
682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_fiji_a10,
685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 amdgpu_program_register_sequence(adev,
687 fiji_golden_common_all,
688 (const u32)ARRAY_SIZE(fiji_golden_common_all));
692 amdgpu_program_register_sequence(adev,
693 tonga_mgcg_cgcg_init,
694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 amdgpu_program_register_sequence(adev,
696 golden_settings_tonga_a11,
697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 amdgpu_program_register_sequence(adev,
699 tonga_golden_common_all,
700 (const u32)ARRAY_SIZE(tonga_golden_common_all));
704 amdgpu_program_register_sequence(adev,
705 golden_settings_polaris11_a11,
706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
707 amdgpu_program_register_sequence(adev,
708 polaris11_golden_common_all,
709 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
712 amdgpu_program_register_sequence(adev,
713 golden_settings_polaris10_a11,
714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
715 amdgpu_program_register_sequence(adev,
716 polaris10_golden_common_all,
717 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
719 if (adev->pdev->revision == 0xc7 &&
720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
728 amdgpu_program_register_sequence(adev,
730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_settings_a11,
733 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 amdgpu_program_register_sequence(adev,
735 cz_golden_common_all,
736 (const u32)ARRAY_SIZE(cz_golden_common_all));
739 amdgpu_program_register_sequence(adev,
740 stoney_mgcg_cgcg_init,
741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_settings_a11,
744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 amdgpu_program_register_sequence(adev,
746 stoney_golden_common_all,
747 (const u32)ARRAY_SIZE(stoney_golden_common_all));
754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
756 adev->gfx.scratch.num_reg = 7;
757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763 struct amdgpu_device *adev = ring->adev;
769 r = amdgpu_gfx_scratch_get(adev, &scratch);
771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
774 WREG32(scratch, 0xCAFEDEAD);
775 r = amdgpu_ring_alloc(ring, 3);
777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
779 amdgpu_gfx_scratch_free(adev, scratch);
782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 amdgpu_ring_write(ring, 0xDEADBEEF);
785 amdgpu_ring_commit(ring);
787 for (i = 0; i < adev->usec_timeout; i++) {
788 tmp = RREG32(scratch);
789 if (tmp == 0xDEADBEEF)
793 if (i < adev->usec_timeout) {
794 DRM_INFO("ring test on %d succeeded in %d usecs\n",
797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 ring->idx, scratch, tmp);
801 amdgpu_gfx_scratch_free(adev, scratch);
805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
807 struct amdgpu_device *adev = ring->adev;
809 struct dma_fence *f = NULL;
814 r = amdgpu_gfx_scratch_get(adev, &scratch);
816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
819 WREG32(scratch, 0xCAFEDEAD);
820 memset(&ib, 0, sizeof(ib));
821 r = amdgpu_ib_get(adev, NULL, 256, &ib);
823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 ib.ptr[2] = 0xDEADBEEF;
831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
835 r = dma_fence_wait_timeout(f, false, timeout);
837 DRM_ERROR("amdgpu: IB test timed out.\n");
841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
844 tmp = RREG32(scratch);
845 if (tmp == 0xDEADBEEF) {
846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
854 amdgpu_ib_free(adev, &ib, NULL);
857 amdgpu_gfx_scratch_free(adev, scratch);
862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 if ((adev->asic_type != CHIP_STONEY) &&
874 (adev->asic_type != CHIP_TOPAZ))
875 release_firmware(adev->gfx.mec2_fw);
876 adev->gfx.mec2_fw = NULL;
878 kfree(adev->gfx.rlc.register_list_format);
881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
883 const char *chip_name;
886 struct amdgpu_firmware_info *info = NULL;
887 const struct common_firmware_header *header = NULL;
888 const struct gfx_firmware_header_v1_0 *cp_hdr;
889 const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 unsigned int *tmp = NULL, i;
894 switch (adev->asic_type) {
902 chip_name = "carrizo";
908 chip_name = "polaris11";
911 chip_name = "polaris10";
914 chip_name = "polaris12";
917 chip_name = "stoney";
923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944 /* chain ib ucode isn't formal released, just disable it by far
945 * TODO: when ucod ready we should use ucode version to judge if
946 * chain-ib support or not.
948 adev->virt.chained_ib_support = false;
950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
956 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
972 adev->gfx.rlc.save_and_restore_offset =
973 le32_to_cpu(rlc_hdr->save_and_restore_offset);
974 adev->gfx.rlc.clear_state_descriptor_offset =
975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976 adev->gfx.rlc.avail_scratch_ram_locations =
977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978 adev->gfx.rlc.reg_restore_list_size =
979 le32_to_cpu(rlc_hdr->reg_restore_list_size);
980 adev->gfx.rlc.reg_list_format_start =
981 le32_to_cpu(rlc_hdr->reg_list_format_start);
982 adev->gfx.rlc.reg_list_format_separate_start =
983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984 adev->gfx.rlc.starting_offsets_start =
985 le32_to_cpu(rlc_hdr->starting_offsets_start);
986 adev->gfx.rlc.reg_list_format_size_bytes =
987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988 adev->gfx.rlc.reg_list_size_bytes =
989 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
991 adev->gfx.rlc.register_list_format =
992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
995 if (!adev->gfx.rlc.register_list_format) {
1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023 if ((adev->asic_type != CHIP_STONEY) &&
1024 (adev->asic_type != CHIP_TOPAZ)) {
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032 adev->gfx.mec2_fw->data;
1033 adev->gfx.mec2_fw_version =
1034 le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.mec2_feature_version =
1036 le32_to_cpu(cp_hdr->ucode_feature_version);
1039 adev->gfx.mec2_fw = NULL;
1043 if (adev->firmware.smu_load) {
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046 info->fw = adev->gfx.pfp_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053 info->fw = adev->gfx.me_fw;
1054 header = (const struct common_firmware_header *)info->fw->data;
1055 adev->firmware.fw_size +=
1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060 info->fw = adev->gfx.ce_fw;
1061 header = (const struct common_firmware_header *)info->fw->data;
1062 adev->firmware.fw_size +=
1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067 info->fw = adev->gfx.rlc_fw;
1068 header = (const struct common_firmware_header *)info->fw->data;
1069 adev->firmware.fw_size +=
1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074 info->fw = adev->gfx.mec_fw;
1075 header = (const struct common_firmware_header *)info->fw->data;
1076 adev->firmware.fw_size +=
1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1079 /* we need account JT in */
1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081 adev->firmware.fw_size +=
1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1084 if (amdgpu_sriov_vf(adev)) {
1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087 info->fw = adev->gfx.mec_fw;
1088 adev->firmware.fw_size +=
1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1092 if (adev->gfx.mec2_fw) {
1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095 info->fw = adev->gfx.mec2_fw;
1096 header = (const struct common_firmware_header *)info->fw->data;
1097 adev->firmware.fw_size +=
1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1106 "gfx8: Failed to load firmware \"%s\"\n",
1108 release_firmware(adev->gfx.pfp_fw);
1109 adev->gfx.pfp_fw = NULL;
1110 release_firmware(adev->gfx.me_fw);
1111 adev->gfx.me_fw = NULL;
1112 release_firmware(adev->gfx.ce_fw);
1113 adev->gfx.ce_fw = NULL;
1114 release_firmware(adev->gfx.rlc_fw);
1115 adev->gfx.rlc_fw = NULL;
1116 release_firmware(adev->gfx.mec_fw);
1117 adev->gfx.mec_fw = NULL;
1118 release_firmware(adev->gfx.mec2_fw);
1119 adev->gfx.mec2_fw = NULL;
1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125 volatile u32 *buffer)
1128 const struct cs_section_def *sect = NULL;
1129 const struct cs_extent_def *ext = NULL;
1131 if (adev->gfx.rlc.cs_data == NULL)
1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140 buffer[count++] = cpu_to_le32(0x80000000);
1141 buffer[count++] = cpu_to_le32(0x80000000);
1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145 if (sect->id == SECT_CONTEXT) {
1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148 buffer[count++] = cpu_to_le32(ext->reg_index -
1149 PACKET3_SET_CONTEXT_REG_START);
1150 for (i = 0; i < ext->reg_count; i++)
1151 buffer[count++] = cpu_to_le32(ext->extent[i]);
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160 PACKET3_SET_CONTEXT_REG_START);
1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168 buffer[count++] = cpu_to_le32(0);
1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1173 const __le32 *fw_data;
1174 volatile u32 *dst_ptr;
1175 int me, i, max_me = 4;
1177 u32 table_offset, table_size;
1179 if (adev->asic_type == CHIP_CARRIZO)
1182 /* write the cp table buffer */
1183 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184 for (me = 0; me < max_me; me++) {
1186 const struct gfx_firmware_header_v1_0 *hdr =
1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188 fw_data = (const __le32 *)
1189 (adev->gfx.ce_fw->data +
1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191 table_offset = le32_to_cpu(hdr->jt_offset);
1192 table_size = le32_to_cpu(hdr->jt_size);
1193 } else if (me == 1) {
1194 const struct gfx_firmware_header_v1_0 *hdr =
1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196 fw_data = (const __le32 *)
1197 (adev->gfx.pfp_fw->data +
1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199 table_offset = le32_to_cpu(hdr->jt_offset);
1200 table_size = le32_to_cpu(hdr->jt_size);
1201 } else if (me == 2) {
1202 const struct gfx_firmware_header_v1_0 *hdr =
1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204 fw_data = (const __le32 *)
1205 (adev->gfx.me_fw->data +
1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207 table_offset = le32_to_cpu(hdr->jt_offset);
1208 table_size = le32_to_cpu(hdr->jt_size);
1209 } else if (me == 3) {
1210 const struct gfx_firmware_header_v1_0 *hdr =
1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 fw_data = (const __le32 *)
1213 (adev->gfx.mec_fw->data +
1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215 table_offset = le32_to_cpu(hdr->jt_offset);
1216 table_size = le32_to_cpu(hdr->jt_size);
1217 } else if (me == 4) {
1218 const struct gfx_firmware_header_v1_0 *hdr =
1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220 fw_data = (const __le32 *)
1221 (adev->gfx.mec2_fw->data +
1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223 table_offset = le32_to_cpu(hdr->jt_offset);
1224 table_size = le32_to_cpu(hdr->jt_size);
1227 for (i = 0; i < table_size; i ++) {
1228 dst_ptr[bo_offset + i] =
1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1232 bo_offset += table_size;
1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1240 /* clear state block */
1241 if (adev->gfx.rlc.clear_state_obj) {
1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243 if (unlikely(r != 0))
1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248 adev->gfx.rlc.clear_state_obj = NULL;
1251 /* jump table block */
1252 if (adev->gfx.rlc.cp_table_obj) {
1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254 if (unlikely(r != 0))
1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259 adev->gfx.rlc.cp_table_obj = NULL;
1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1265 volatile u32 *dst_ptr;
1267 const struct cs_section_def *cs_data;
1270 adev->gfx.rlc.cs_data = vi_cs_data;
1272 cs_data = adev->gfx.rlc.cs_data;
1275 /* clear state block */
1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1278 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280 AMDGPU_GEM_DOMAIN_VRAM,
1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1284 &adev->gfx.rlc.clear_state_obj);
1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287 gfx_v8_0_rlc_fini(adev);
1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292 if (unlikely(r != 0)) {
1293 gfx_v8_0_rlc_fini(adev);
1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297 &adev->gfx.rlc.clear_state_gpu_addr);
1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1301 gfx_v8_0_rlc_fini(adev);
1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1308 gfx_v8_0_rlc_fini(adev);
1311 /* set up the cs buffer */
1312 dst_ptr = adev->gfx.rlc.cs_ptr;
1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1318 if ((adev->asic_type == CHIP_CARRIZO) ||
1319 (adev->asic_type == CHIP_STONEY)) {
1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1321 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323 AMDGPU_GEM_DOMAIN_VRAM,
1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1327 &adev->gfx.rlc.cp_table_obj);
1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335 if (unlikely(r != 0)) {
1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340 &adev->gfx.rlc.cp_table_gpu_addr);
1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1352 cz_init_cp_jump_table(adev);
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1365 if (adev->gfx.mec.hpd_eop_obj) {
1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367 if (unlikely(r != 0))
1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372 adev->gfx.mec.hpd_eop_obj = NULL;
1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377 struct amdgpu_ring *ring,
1378 struct amdgpu_irq_src *irq)
1382 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1387 ring->ring_obj = NULL;
1388 ring->use_doorbell = true;
1389 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1390 if (adev->gfx.mec2_fw) {
1399 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1400 r = amdgpu_ring_init(adev, ring, 1024,
1401 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1403 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1407 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1408 struct amdgpu_irq_src *irq)
1410 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1411 amdgpu_ring_fini(ring);
1414 #define MEC_HPD_SIZE 2048
1416 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1422 * we assign only 1 pipe because all other pipes will
1425 adev->gfx.mec.num_mec = 1;
1426 adev->gfx.mec.num_pipe = 1;
1427 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1429 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1430 r = amdgpu_bo_create(adev,
1431 adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1433 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1434 &adev->gfx.mec.hpd_eop_obj);
1436 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1441 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1442 if (unlikely(r != 0)) {
1443 gfx_v8_0_mec_fini(adev);
1446 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1447 &adev->gfx.mec.hpd_eop_gpu_addr);
1449 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1450 gfx_v8_0_mec_fini(adev);
1453 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1455 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1456 gfx_v8_0_mec_fini(adev);
1460 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1462 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1463 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1468 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1470 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1472 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1475 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1479 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1481 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1482 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1483 &kiq->eop_gpu_addr, (void **)&hpd);
1485 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1489 memset(hpd, 0, MEC_HPD_SIZE);
1491 amdgpu_bo_kunmap(kiq->eop_obj);
1496 static const u32 vgpr_init_compute_shader[] =
1498 0x7e000209, 0x7e020208,
1499 0x7e040207, 0x7e060206,
1500 0x7e080205, 0x7e0a0204,
1501 0x7e0c0203, 0x7e0e0202,
1502 0x7e100201, 0x7e120200,
1503 0x7e140209, 0x7e160208,
1504 0x7e180207, 0x7e1a0206,
1505 0x7e1c0205, 0x7e1e0204,
1506 0x7e200203, 0x7e220202,
1507 0x7e240201, 0x7e260200,
1508 0x7e280209, 0x7e2a0208,
1509 0x7e2c0207, 0x7e2e0206,
1510 0x7e300205, 0x7e320204,
1511 0x7e340203, 0x7e360202,
1512 0x7e380201, 0x7e3a0200,
1513 0x7e3c0209, 0x7e3e0208,
1514 0x7e400207, 0x7e420206,
1515 0x7e440205, 0x7e460204,
1516 0x7e480203, 0x7e4a0202,
1517 0x7e4c0201, 0x7e4e0200,
1518 0x7e500209, 0x7e520208,
1519 0x7e540207, 0x7e560206,
1520 0x7e580205, 0x7e5a0204,
1521 0x7e5c0203, 0x7e5e0202,
1522 0x7e600201, 0x7e620200,
1523 0x7e640209, 0x7e660208,
1524 0x7e680207, 0x7e6a0206,
1525 0x7e6c0205, 0x7e6e0204,
1526 0x7e700203, 0x7e720202,
1527 0x7e740201, 0x7e760200,
1528 0x7e780209, 0x7e7a0208,
1529 0x7e7c0207, 0x7e7e0206,
1530 0xbf8a0000, 0xbf810000,
1533 static const u32 sgpr_init_compute_shader[] =
1535 0xbe8a0100, 0xbe8c0102,
1536 0xbe8e0104, 0xbe900106,
1537 0xbe920108, 0xbe940100,
1538 0xbe960102, 0xbe980104,
1539 0xbe9a0106, 0xbe9c0108,
1540 0xbe9e0100, 0xbea00102,
1541 0xbea20104, 0xbea40106,
1542 0xbea60108, 0xbea80100,
1543 0xbeaa0102, 0xbeac0104,
1544 0xbeae0106, 0xbeb00108,
1545 0xbeb20100, 0xbeb40102,
1546 0xbeb60104, 0xbeb80106,
1547 0xbeba0108, 0xbebc0100,
1548 0xbebe0102, 0xbec00104,
1549 0xbec20106, 0xbec40108,
1550 0xbec60100, 0xbec80102,
1551 0xbee60004, 0xbee70005,
1552 0xbeea0006, 0xbeeb0007,
1553 0xbee80008, 0xbee90009,
1554 0xbefc0000, 0xbf8a0000,
1555 0xbf810000, 0x00000000,
1558 static const u32 vgpr_init_regs[] =
1560 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1561 mmCOMPUTE_RESOURCE_LIMITS, 0,
1562 mmCOMPUTE_NUM_THREAD_X, 256*4,
1563 mmCOMPUTE_NUM_THREAD_Y, 1,
1564 mmCOMPUTE_NUM_THREAD_Z, 1,
1565 mmCOMPUTE_PGM_RSRC2, 20,
1566 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1567 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1568 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1569 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1570 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1571 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1572 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1573 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1574 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1575 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1578 static const u32 sgpr1_init_regs[] =
1580 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1581 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1582 mmCOMPUTE_NUM_THREAD_X, 256*5,
1583 mmCOMPUTE_NUM_THREAD_Y, 1,
1584 mmCOMPUTE_NUM_THREAD_Z, 1,
1585 mmCOMPUTE_PGM_RSRC2, 20,
1586 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1587 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1588 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1589 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1590 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1591 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1592 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1593 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1594 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1595 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1598 static const u32 sgpr2_init_regs[] =
1600 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1601 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1602 mmCOMPUTE_NUM_THREAD_X, 256*5,
1603 mmCOMPUTE_NUM_THREAD_Y, 1,
1604 mmCOMPUTE_NUM_THREAD_Z, 1,
1605 mmCOMPUTE_PGM_RSRC2, 20,
1606 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1607 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1608 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1609 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1610 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1611 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1612 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1613 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1614 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1615 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1618 static const u32 sec_ded_counter_registers[] =
1621 mmCPC_EDC_SCRATCH_CNT,
1622 mmCPC_EDC_UCODE_CNT,
1629 mmDC_EDC_CSINVOC_CNT,
1630 mmDC_EDC_RESTORE_CNT,
1636 mmSQC_ATC_EDC_GATCL1_CNT,
1642 mmTCP_ATC_EDC_GATCL1_CNT,
1647 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1649 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1650 struct amdgpu_ib ib;
1651 struct dma_fence *f = NULL;
1654 unsigned total_size, vgpr_offset, sgpr_offset;
1657 /* only supported on CZ */
1658 if (adev->asic_type != CHIP_CARRIZO)
1661 /* bail if the compute ring is not ready */
1665 tmp = RREG32(mmGB_EDC_MODE);
1666 WREG32(mmGB_EDC_MODE, 0);
1669 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1671 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1673 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1674 total_size = ALIGN(total_size, 256);
1675 vgpr_offset = total_size;
1676 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1677 sgpr_offset = total_size;
1678 total_size += sizeof(sgpr_init_compute_shader);
1680 /* allocate an indirect buffer to put the commands in */
1681 memset(&ib, 0, sizeof(ib));
1682 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1684 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1688 /* load the compute shaders */
1689 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1690 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1692 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1693 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1695 /* init the ib length to 0 */
1699 /* write the register state for the compute dispatch */
1700 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1701 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1702 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1703 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1705 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1706 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1708 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1709 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1710 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1712 /* write dispatch packet */
1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1714 ib.ptr[ib.length_dw++] = 8; /* x */
1715 ib.ptr[ib.length_dw++] = 1; /* y */
1716 ib.ptr[ib.length_dw++] = 1; /* z */
1717 ib.ptr[ib.length_dw++] =
1718 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1720 /* write CS partial flush packet */
1721 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1722 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1725 /* write the register state for the compute dispatch */
1726 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1728 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1729 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1731 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1732 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1734 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1735 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1736 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1738 /* write dispatch packet */
1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1740 ib.ptr[ib.length_dw++] = 8; /* x */
1741 ib.ptr[ib.length_dw++] = 1; /* y */
1742 ib.ptr[ib.length_dw++] = 1; /* z */
1743 ib.ptr[ib.length_dw++] =
1744 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1746 /* write CS partial flush packet */
1747 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1748 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1751 /* write the register state for the compute dispatch */
1752 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1754 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1755 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1757 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1758 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1760 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1761 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1762 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1764 /* write dispatch packet */
1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1766 ib.ptr[ib.length_dw++] = 8; /* x */
1767 ib.ptr[ib.length_dw++] = 1; /* y */
1768 ib.ptr[ib.length_dw++] = 1; /* z */
1769 ib.ptr[ib.length_dw++] =
1770 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1772 /* write CS partial flush packet */
1773 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1774 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1776 /* shedule the ib on the ring */
1777 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1779 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1783 /* wait for the GPU to finish processing the IB */
1784 r = dma_fence_wait(f, false);
1786 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1790 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1791 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1792 WREG32(mmGB_EDC_MODE, tmp);
1794 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1795 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1796 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1799 /* read back registers to clear the counters */
1800 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1801 RREG32(sec_ded_counter_registers[i]);
1804 amdgpu_ib_free(adev, &ib, NULL);
1810 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1813 u32 mc_shared_chmap, mc_arb_ramcfg;
1814 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1818 switch (adev->asic_type) {
1820 adev->gfx.config.max_shader_engines = 1;
1821 adev->gfx.config.max_tile_pipes = 2;
1822 adev->gfx.config.max_cu_per_sh = 6;
1823 adev->gfx.config.max_sh_per_se = 1;
1824 adev->gfx.config.max_backends_per_se = 2;
1825 adev->gfx.config.max_texture_channel_caches = 2;
1826 adev->gfx.config.max_gprs = 256;
1827 adev->gfx.config.max_gs_threads = 32;
1828 adev->gfx.config.max_hw_contexts = 8;
1830 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1837 adev->gfx.config.max_shader_engines = 4;
1838 adev->gfx.config.max_tile_pipes = 16;
1839 adev->gfx.config.max_cu_per_sh = 16;
1840 adev->gfx.config.max_sh_per_se = 1;
1841 adev->gfx.config.max_backends_per_se = 4;
1842 adev->gfx.config.max_texture_channel_caches = 16;
1843 adev->gfx.config.max_gprs = 256;
1844 adev->gfx.config.max_gs_threads = 32;
1845 adev->gfx.config.max_hw_contexts = 8;
1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1853 case CHIP_POLARIS11:
1854 case CHIP_POLARIS12:
1855 ret = amdgpu_atombios_get_gfx_info(adev);
1858 adev->gfx.config.max_gprs = 256;
1859 adev->gfx.config.max_gs_threads = 32;
1860 adev->gfx.config.max_hw_contexts = 8;
1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1866 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1868 case CHIP_POLARIS10:
1869 ret = amdgpu_atombios_get_gfx_info(adev);
1872 adev->gfx.config.max_gprs = 256;
1873 adev->gfx.config.max_gs_threads = 32;
1874 adev->gfx.config.max_hw_contexts = 8;
1876 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1883 adev->gfx.config.max_shader_engines = 4;
1884 adev->gfx.config.max_tile_pipes = 8;
1885 adev->gfx.config.max_cu_per_sh = 8;
1886 adev->gfx.config.max_sh_per_se = 1;
1887 adev->gfx.config.max_backends_per_se = 2;
1888 adev->gfx.config.max_texture_channel_caches = 8;
1889 adev->gfx.config.max_gprs = 256;
1890 adev->gfx.config.max_gs_threads = 32;
1891 adev->gfx.config.max_hw_contexts = 8;
1893 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1894 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1895 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1896 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1897 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1900 adev->gfx.config.max_shader_engines = 1;
1901 adev->gfx.config.max_tile_pipes = 2;
1902 adev->gfx.config.max_sh_per_se = 1;
1903 adev->gfx.config.max_backends_per_se = 2;
1905 switch (adev->pdev->revision) {
1913 adev->gfx.config.max_cu_per_sh = 8;
1923 adev->gfx.config.max_cu_per_sh = 6;
1930 adev->gfx.config.max_cu_per_sh = 6;
1939 adev->gfx.config.max_cu_per_sh = 4;
1943 adev->gfx.config.max_texture_channel_caches = 2;
1944 adev->gfx.config.max_gprs = 256;
1945 adev->gfx.config.max_gs_threads = 32;
1946 adev->gfx.config.max_hw_contexts = 8;
1948 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1949 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1950 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1951 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1952 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1955 adev->gfx.config.max_shader_engines = 1;
1956 adev->gfx.config.max_tile_pipes = 2;
1957 adev->gfx.config.max_sh_per_se = 1;
1958 adev->gfx.config.max_backends_per_se = 1;
1960 switch (adev->pdev->revision) {
1967 adev->gfx.config.max_cu_per_sh = 3;
1973 adev->gfx.config.max_cu_per_sh = 2;
1977 adev->gfx.config.max_texture_channel_caches = 2;
1978 adev->gfx.config.max_gprs = 256;
1979 adev->gfx.config.max_gs_threads = 16;
1980 adev->gfx.config.max_hw_contexts = 8;
1982 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1983 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1984 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1985 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1986 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1989 adev->gfx.config.max_shader_engines = 2;
1990 adev->gfx.config.max_tile_pipes = 4;
1991 adev->gfx.config.max_cu_per_sh = 2;
1992 adev->gfx.config.max_sh_per_se = 1;
1993 adev->gfx.config.max_backends_per_se = 2;
1994 adev->gfx.config.max_texture_channel_caches = 4;
1995 adev->gfx.config.max_gprs = 256;
1996 adev->gfx.config.max_gs_threads = 32;
1997 adev->gfx.config.max_hw_contexts = 8;
1999 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2000 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2001 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2002 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2003 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2007 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2008 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2009 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2011 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2012 adev->gfx.config.mem_max_burst_length_bytes = 256;
2013 if (adev->flags & AMD_IS_APU) {
2014 /* Get memory bank mapping mode. */
2015 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2016 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2017 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2019 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2020 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2021 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2023 /* Validate settings in case only one DIMM installed. */
2024 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2025 dimm00_addr_map = 0;
2026 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2027 dimm01_addr_map = 0;
2028 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2029 dimm10_addr_map = 0;
2030 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2031 dimm11_addr_map = 0;
2033 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2034 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2035 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2036 adev->gfx.config.mem_row_size_in_kb = 2;
2038 adev->gfx.config.mem_row_size_in_kb = 1;
2040 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2041 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2042 if (adev->gfx.config.mem_row_size_in_kb > 4)
2043 adev->gfx.config.mem_row_size_in_kb = 4;
2046 adev->gfx.config.shader_engine_tile_size = 32;
2047 adev->gfx.config.num_gpus = 1;
2048 adev->gfx.config.multi_gpu_tile_size = 64;
2050 /* fix up row size */
2051 switch (adev->gfx.config.mem_row_size_in_kb) {
2054 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2057 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2060 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2063 adev->gfx.config.gb_addr_config = gb_addr_config;
2068 static int gfx_v8_0_sw_init(void *handle)
2071 struct amdgpu_ring *ring;
2072 struct amdgpu_kiq *kiq;
2073 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2076 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2081 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2085 /* Privileged reg */
2086 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2087 &adev->gfx.priv_reg_irq);
2091 /* Privileged inst */
2092 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2093 &adev->gfx.priv_inst_irq);
2097 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2099 gfx_v8_0_scratch_init(adev);
2101 r = gfx_v8_0_init_microcode(adev);
2103 DRM_ERROR("Failed to load gfx firmware!\n");
2107 r = gfx_v8_0_rlc_init(adev);
2109 DRM_ERROR("Failed to init rlc BOs!\n");
2113 r = gfx_v8_0_mec_init(adev);
2115 DRM_ERROR("Failed to init MEC BOs!\n");
2119 /* set up the gfx ring */
2120 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2121 ring = &adev->gfx.gfx_ring[i];
2122 ring->ring_obj = NULL;
2123 sprintf(ring->name, "gfx");
2124 /* no gfx doorbells on iceland */
2125 if (adev->asic_type != CHIP_TOPAZ) {
2126 ring->use_doorbell = true;
2127 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2130 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2131 AMDGPU_CP_IRQ_GFX_EOP);
2136 /* set up the compute queues */
2137 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2140 /* max 32 queues per MEC */
2141 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2142 DRM_ERROR("Too many (%d) compute rings!\n", i);
2145 ring = &adev->gfx.compute_ring[i];
2146 ring->ring_obj = NULL;
2147 ring->use_doorbell = true;
2148 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2149 ring->me = 1; /* first MEC */
2151 ring->queue = i % 8;
2152 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2153 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2154 /* type-2 packets are deprecated on MEC, use type-3 instead */
2155 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2161 if (amdgpu_sriov_vf(adev)) {
2162 r = gfx_v8_0_kiq_init(adev);
2164 DRM_ERROR("Failed to init KIQ BOs!\n");
2168 kiq = &adev->gfx.kiq;
2169 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2173 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2174 r = gfx_v8_0_compute_mqd_sw_init(adev);
2179 /* reserve GDS, GWS and OA resource for gfx */
2180 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2181 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2182 &adev->gds.gds_gfx_bo, NULL, NULL);
2186 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2187 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2188 &adev->gds.gws_gfx_bo, NULL, NULL);
2192 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2193 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2194 &adev->gds.oa_gfx_bo, NULL, NULL);
2198 adev->gfx.ce_ram_size = 0x8000;
2200 r = gfx_v8_0_gpu_early_init(adev);
2207 static int gfx_v8_0_sw_fini(void *handle)
2210 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2212 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2213 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2214 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2216 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2217 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2218 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2219 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2221 if (amdgpu_sriov_vf(adev)) {
2222 gfx_v8_0_compute_mqd_sw_fini(adev);
2223 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2224 gfx_v8_0_kiq_fini(adev);
2227 gfx_v8_0_mec_fini(adev);
2228 gfx_v8_0_rlc_fini(adev);
2229 gfx_v8_0_free_microcode(adev);
2234 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2236 uint32_t *modearray, *mod2array;
2237 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2238 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2241 modearray = adev->gfx.config.tile_mode_array;
2242 mod2array = adev->gfx.config.macrotile_mode_array;
2244 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2245 modearray[reg_offset] = 0;
2247 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248 mod2array[reg_offset] = 0;
2250 switch (adev->asic_type) {
2252 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2281 PIPE_CONFIG(ADDR_SURF_P2));
2282 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2286 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2294 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2302 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2310 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2326 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2335 PIPE_CONFIG(ADDR_SURF_P2) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2339 PIPE_CONFIG(ADDR_SURF_P2) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2346 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P2) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2351 PIPE_CONFIG(ADDR_SURF_P2) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2355 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366 NUM_BANKS(ADDR_SURF_8_BANK));
2367 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2371 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378 NUM_BANKS(ADDR_SURF_8_BANK));
2379 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382 NUM_BANKS(ADDR_SURF_8_BANK));
2383 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386 NUM_BANKS(ADDR_SURF_16_BANK));
2387 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390 NUM_BANKS(ADDR_SURF_16_BANK));
2391 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 NUM_BANKS(ADDR_SURF_16_BANK));
2395 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2398 NUM_BANKS(ADDR_SURF_16_BANK));
2399 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2402 NUM_BANKS(ADDR_SURF_16_BANK));
2403 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2406 NUM_BANKS(ADDR_SURF_16_BANK));
2407 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2410 NUM_BANKS(ADDR_SURF_8_BANK));
2412 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2413 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2415 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2417 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2418 if (reg_offset != 7)
2419 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2423 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2435 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2439 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2443 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2447 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2456 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2457 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2465 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2469 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2473 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2477 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2478 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2482 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2485 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2486 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2489 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2490 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2493 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2494 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2497 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2498 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2500 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2502 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2506 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2509 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2510 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2513 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2514 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2517 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2518 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2521 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2522 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2525 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2526 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2528 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2529 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2536 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2546 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2548 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2549 NUM_BANKS(ADDR_SURF_8_BANK));
2550 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2553 NUM_BANKS(ADDR_SURF_8_BANK));
2554 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557 NUM_BANKS(ADDR_SURF_8_BANK));
2558 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2561 NUM_BANKS(ADDR_SURF_8_BANK));
2562 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2565 NUM_BANKS(ADDR_SURF_8_BANK));
2566 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2569 NUM_BANKS(ADDR_SURF_8_BANK));
2570 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2573 NUM_BANKS(ADDR_SURF_8_BANK));
2574 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2577 NUM_BANKS(ADDR_SURF_8_BANK));
2578 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2581 NUM_BANKS(ADDR_SURF_8_BANK));
2582 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2585 NUM_BANKS(ADDR_SURF_8_BANK));
2586 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2589 NUM_BANKS(ADDR_SURF_8_BANK));
2590 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2592 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2593 NUM_BANKS(ADDR_SURF_8_BANK));
2594 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2597 NUM_BANKS(ADDR_SURF_8_BANK));
2598 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2601 NUM_BANKS(ADDR_SURF_4_BANK));
2603 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2604 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2606 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2607 if (reg_offset != 7)
2608 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2612 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2616 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2628 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2632 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2658 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2662 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2663 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2671 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2683 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2686 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2687 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2691 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2695 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2699 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2702 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2707 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2711 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2715 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2738 NUM_BANKS(ADDR_SURF_16_BANK));
2739 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2741 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742 NUM_BANKS(ADDR_SURF_16_BANK));
2743 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2747 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750 NUM_BANKS(ADDR_SURF_16_BANK));
2751 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754 NUM_BANKS(ADDR_SURF_16_BANK));
2755 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2759 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762 NUM_BANKS(ADDR_SURF_16_BANK));
2763 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2766 NUM_BANKS(ADDR_SURF_16_BANK));
2767 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770 NUM_BANKS(ADDR_SURF_16_BANK));
2771 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2774 NUM_BANKS(ADDR_SURF_16_BANK));
2775 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778 NUM_BANKS(ADDR_SURF_16_BANK));
2779 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782 NUM_BANKS(ADDR_SURF_8_BANK));
2783 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2786 NUM_BANKS(ADDR_SURF_4_BANK));
2787 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2789 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2790 NUM_BANKS(ADDR_SURF_4_BANK));
2792 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2793 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2795 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2796 if (reg_offset != 7)
2797 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2800 case CHIP_POLARIS11:
2801 case CHIP_POLARIS12:
2802 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2806 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2814 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2818 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2822 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2826 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2830 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2835 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2836 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2848 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2852 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2860 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2861 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2865 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2868 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2869 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2872 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2873 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2876 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2877 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2885 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2888 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2889 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2892 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2893 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2896 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2897 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2900 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2904 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2905 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2920 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2925 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928 NUM_BANKS(ADDR_SURF_16_BANK));
2930 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 NUM_BANKS(ADDR_SURF_16_BANK));
2935 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2938 NUM_BANKS(ADDR_SURF_16_BANK));
2940 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943 NUM_BANKS(ADDR_SURF_16_BANK));
2945 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2948 NUM_BANKS(ADDR_SURF_16_BANK));
2950 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2953 NUM_BANKS(ADDR_SURF_16_BANK));
2955 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2958 NUM_BANKS(ADDR_SURF_16_BANK));
2960 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2965 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2968 NUM_BANKS(ADDR_SURF_16_BANK));
2970 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2973 NUM_BANKS(ADDR_SURF_16_BANK));
2975 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 NUM_BANKS(ADDR_SURF_16_BANK));
2980 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983 NUM_BANKS(ADDR_SURF_16_BANK));
2985 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2988 NUM_BANKS(ADDR_SURF_8_BANK));
2990 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2993 NUM_BANKS(ADDR_SURF_4_BANK));
2995 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2996 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2998 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2999 if (reg_offset != 7)
3000 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3003 case CHIP_POLARIS10:
3004 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3008 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3016 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3020 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3024 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3025 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3026 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3028 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3032 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3033 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3034 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3036 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3037 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3038 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3042 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3047 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3048 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3049 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3050 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3051 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3054 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3055 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3058 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3059 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3061 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3062 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3063 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3066 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3067 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3070 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3071 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3074 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3075 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3078 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3079 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3083 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3087 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3091 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3094 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3095 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3099 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3103 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3107 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3111 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3122 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3127 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3130 NUM_BANKS(ADDR_SURF_16_BANK));
3132 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135 NUM_BANKS(ADDR_SURF_16_BANK));
3137 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3140 NUM_BANKS(ADDR_SURF_16_BANK));
3142 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145 NUM_BANKS(ADDR_SURF_16_BANK));
3147 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 NUM_BANKS(ADDR_SURF_16_BANK));
3152 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3155 NUM_BANKS(ADDR_SURF_16_BANK));
3157 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3160 NUM_BANKS(ADDR_SURF_16_BANK));
3162 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3165 NUM_BANKS(ADDR_SURF_16_BANK));
3167 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 NUM_BANKS(ADDR_SURF_16_BANK));
3172 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3175 NUM_BANKS(ADDR_SURF_16_BANK));
3177 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3180 NUM_BANKS(ADDR_SURF_16_BANK));
3182 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3185 NUM_BANKS(ADDR_SURF_8_BANK));
3187 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3190 NUM_BANKS(ADDR_SURF_4_BANK));
3192 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3195 NUM_BANKS(ADDR_SURF_4_BANK));
3197 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3198 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3200 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3201 if (reg_offset != 7)
3202 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3206 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207 PIPE_CONFIG(ADDR_SURF_P2) |
3208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3210 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P2) |
3212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3214 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P2) |
3216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3218 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P2) |
3220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3222 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P2) |
3224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3226 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 PIPE_CONFIG(ADDR_SURF_P2) |
3228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3230 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3234 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3235 PIPE_CONFIG(ADDR_SURF_P2));
3236 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3240 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3244 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3248 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3252 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3264 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3268 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3280 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3284 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3288 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3292 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3296 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305 PIPE_CONFIG(ADDR_SURF_P2) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3309 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312 NUM_BANKS(ADDR_SURF_8_BANK));
3313 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316 NUM_BANKS(ADDR_SURF_8_BANK));
3317 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3320 NUM_BANKS(ADDR_SURF_8_BANK));
3321 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3324 NUM_BANKS(ADDR_SURF_8_BANK));
3325 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3328 NUM_BANKS(ADDR_SURF_8_BANK));
3329 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3332 NUM_BANKS(ADDR_SURF_8_BANK));
3333 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3336 NUM_BANKS(ADDR_SURF_8_BANK));
3337 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3340 NUM_BANKS(ADDR_SURF_16_BANK));
3341 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3344 NUM_BANKS(ADDR_SURF_16_BANK));
3345 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 NUM_BANKS(ADDR_SURF_16_BANK));
3349 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352 NUM_BANKS(ADDR_SURF_16_BANK));
3353 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356 NUM_BANKS(ADDR_SURF_16_BANK));
3357 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360 NUM_BANKS(ADDR_SURF_16_BANK));
3361 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364 NUM_BANKS(ADDR_SURF_8_BANK));
3366 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3367 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3369 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3371 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3372 if (reg_offset != 7)
3373 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3378 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3382 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3383 PIPE_CONFIG(ADDR_SURF_P2) |
3384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3386 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3387 PIPE_CONFIG(ADDR_SURF_P2) |
3388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3390 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3391 PIPE_CONFIG(ADDR_SURF_P2) |
3392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3394 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3395 PIPE_CONFIG(ADDR_SURF_P2) |
3396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3398 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3399 PIPE_CONFIG(ADDR_SURF_P2) |
3400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3402 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3403 PIPE_CONFIG(ADDR_SURF_P2) |
3404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3406 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3407 PIPE_CONFIG(ADDR_SURF_P2) |
3408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3410 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3411 PIPE_CONFIG(ADDR_SURF_P2));
3412 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3413 PIPE_CONFIG(ADDR_SURF_P2) |
3414 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3416 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3417 PIPE_CONFIG(ADDR_SURF_P2) |
3418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3420 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3421 PIPE_CONFIG(ADDR_SURF_P2) |
3422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3424 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3425 PIPE_CONFIG(ADDR_SURF_P2) |
3426 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3428 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3429 PIPE_CONFIG(ADDR_SURF_P2) |
3430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3432 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3433 PIPE_CONFIG(ADDR_SURF_P2) |
3434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3436 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3437 PIPE_CONFIG(ADDR_SURF_P2) |
3438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3440 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3441 PIPE_CONFIG(ADDR_SURF_P2) |
3442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3444 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3445 PIPE_CONFIG(ADDR_SURF_P2) |
3446 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3449 PIPE_CONFIG(ADDR_SURF_P2) |
3450 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3452 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3453 PIPE_CONFIG(ADDR_SURF_P2) |
3454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3456 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3457 PIPE_CONFIG(ADDR_SURF_P2) |
3458 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3460 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3461 PIPE_CONFIG(ADDR_SURF_P2) |
3462 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3464 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3465 PIPE_CONFIG(ADDR_SURF_P2) |
3466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3468 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3469 PIPE_CONFIG(ADDR_SURF_P2) |
3470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3472 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3473 PIPE_CONFIG(ADDR_SURF_P2) |
3474 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3476 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3477 PIPE_CONFIG(ADDR_SURF_P2) |
3478 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3480 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3481 PIPE_CONFIG(ADDR_SURF_P2) |
3482 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3485 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3488 NUM_BANKS(ADDR_SURF_8_BANK));
3489 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3492 NUM_BANKS(ADDR_SURF_8_BANK));
3493 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3496 NUM_BANKS(ADDR_SURF_8_BANK));
3497 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3500 NUM_BANKS(ADDR_SURF_8_BANK));
3501 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3504 NUM_BANKS(ADDR_SURF_8_BANK));
3505 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3508 NUM_BANKS(ADDR_SURF_8_BANK));
3509 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3512 NUM_BANKS(ADDR_SURF_8_BANK));
3513 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3516 NUM_BANKS(ADDR_SURF_16_BANK));
3517 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3520 NUM_BANKS(ADDR_SURF_16_BANK));
3521 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3524 NUM_BANKS(ADDR_SURF_16_BANK));
3525 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3528 NUM_BANKS(ADDR_SURF_16_BANK));
3529 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3532 NUM_BANKS(ADDR_SURF_16_BANK));
3533 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3536 NUM_BANKS(ADDR_SURF_16_BANK));
3537 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3540 NUM_BANKS(ADDR_SURF_8_BANK));
3542 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3543 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3545 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3547 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3548 if (reg_offset != 7)
3549 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3555 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3556 u32 se_num, u32 sh_num, u32 instance)
3560 if (instance == 0xffffffff)
3561 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3563 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3565 if (se_num == 0xffffffff)
3566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3568 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3570 if (sh_num == 0xffffffff)
3571 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3575 WREG32(mmGRBM_GFX_INDEX, data);
3578 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3580 return (u32)((1ULL << bit_width) - 1);
3583 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3587 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3588 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3590 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3592 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3593 adev->gfx.config.max_sh_per_se);
3595 return (~data) & mask;
3599 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3601 switch (adev->asic_type) {
3603 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3604 RB_XSEL2(1) | PKR_MAP(2) |
3605 PKR_XSEL(1) | PKR_YSEL(1) |
3606 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3607 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3611 case CHIP_POLARIS10:
3612 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3613 SE_XSEL(1) | SE_YSEL(1);
3614 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3619 *rconf |= RB_MAP_PKR0(2);
3622 case CHIP_POLARIS11:
3623 case CHIP_POLARIS12:
3624 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3625 SE_XSEL(1) | SE_YSEL(1);
3633 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3639 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3640 u32 raster_config, u32 raster_config_1,
3641 unsigned rb_mask, unsigned num_rb)
3643 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3644 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3645 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3646 unsigned rb_per_se = num_rb / num_se;
3647 unsigned se_mask[4];
3650 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3651 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3652 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3653 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3655 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3656 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3657 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3659 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3660 (!se_mask[2] && !se_mask[3]))) {
3661 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3663 if (!se_mask[0] && !se_mask[1]) {
3665 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3668 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3672 for (se = 0; se < num_se; se++) {
3673 unsigned raster_config_se = raster_config;
3674 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3675 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3676 int idx = (se / 2) * 2;
3678 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3679 raster_config_se &= ~SE_MAP_MASK;
3681 if (!se_mask[idx]) {
3682 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3684 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3688 pkr0_mask &= rb_mask;
3689 pkr1_mask &= rb_mask;
3690 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3691 raster_config_se &= ~PKR_MAP_MASK;
3694 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3696 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3700 if (rb_per_se >= 2) {
3701 unsigned rb0_mask = 1 << (se * rb_per_se);
3702 unsigned rb1_mask = rb0_mask << 1;
3704 rb0_mask &= rb_mask;
3705 rb1_mask &= rb_mask;
3706 if (!rb0_mask || !rb1_mask) {
3707 raster_config_se &= ~RB_MAP_PKR0_MASK;
3711 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3714 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3718 if (rb_per_se > 2) {
3719 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3720 rb1_mask = rb0_mask << 1;
3721 rb0_mask &= rb_mask;
3722 rb1_mask &= rb_mask;
3723 if (!rb0_mask || !rb1_mask) {
3724 raster_config_se &= ~RB_MAP_PKR1_MASK;
3728 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3731 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3737 /* GRBM_GFX_INDEX has a different offset on VI */
3738 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3739 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3740 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3743 /* GRBM_GFX_INDEX has a different offset on VI */
3744 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3747 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3751 u32 raster_config = 0, raster_config_1 = 0;
3753 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3754 adev->gfx.config.max_sh_per_se;
3755 unsigned num_rb_pipes;
3757 mutex_lock(&adev->grbm_idx_mutex);
3758 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3759 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3760 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3761 data = gfx_v8_0_get_rb_active_bitmap(adev);
3762 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3763 rb_bitmap_width_per_sh);
3766 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3768 adev->gfx.config.backend_enable_mask = active_rbs;
3769 adev->gfx.config.num_rbs = hweight32(active_rbs);
3771 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3772 adev->gfx.config.max_shader_engines, 16);
3774 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3776 if (!adev->gfx.config.backend_enable_mask ||
3777 adev->gfx.config.num_rbs >= num_rb_pipes) {
3778 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3779 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3781 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3782 adev->gfx.config.backend_enable_mask,
3786 /* cache the values for userspace */
3787 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3788 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3789 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3790 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3791 RREG32(mmCC_RB_BACKEND_DISABLE);
3792 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3793 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3794 adev->gfx.config.rb_config[i][j].raster_config =
3795 RREG32(mmPA_SC_RASTER_CONFIG);
3796 adev->gfx.config.rb_config[i][j].raster_config_1 =
3797 RREG32(mmPA_SC_RASTER_CONFIG_1);
3800 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3801 mutex_unlock(&adev->grbm_idx_mutex);
3805 * gfx_v8_0_init_compute_vmid - gart enable
3807 * @rdev: amdgpu_device pointer
3809 * Initialize compute vmid sh_mem registers
3812 #define DEFAULT_SH_MEM_BASES (0x6000)
3813 #define FIRST_COMPUTE_VMID (8)
3814 #define LAST_COMPUTE_VMID (16)
3815 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3818 uint32_t sh_mem_config;
3819 uint32_t sh_mem_bases;
3822 * Configure apertures:
3823 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3824 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3825 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3827 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3829 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3830 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3831 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3832 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3833 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3834 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3836 mutex_lock(&adev->srbm_mutex);
3837 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3838 vi_srbm_select(adev, 0, 0, 0, i);
3839 /* CP and shaders */
3840 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3841 WREG32(mmSH_MEM_APE1_BASE, 1);
3842 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3843 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3845 vi_srbm_select(adev, 0, 0, 0, 0);
3846 mutex_unlock(&adev->srbm_mutex);
3849 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3851 switch (adev->asic_type) {
3853 adev->gfx.config.double_offchip_lds_buf = 1;
3857 adev->gfx.config.double_offchip_lds_buf = 0;
3862 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3864 u32 tmp, sh_static_mem_cfg;
3867 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3868 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3869 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3870 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3872 gfx_v8_0_tiling_mode_table_init(adev);
3873 gfx_v8_0_setup_rb(adev);
3874 gfx_v8_0_get_cu_info(adev);
3875 gfx_v8_0_config_init(adev);
3877 /* XXX SH_MEM regs */
3878 /* where to put LDS, scratch, GPUVM in FSA64 space */
3879 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3881 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3883 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3885 mutex_lock(&adev->srbm_mutex);
3886 for (i = 0; i < adev->vm_manager.num_ids; i++) {
3887 vi_srbm_select(adev, 0, 0, 0, i);
3888 /* CP and shaders */
3890 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3891 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3892 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3893 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3894 WREG32(mmSH_MEM_CONFIG, tmp);
3895 WREG32(mmSH_MEM_BASES, 0);
3897 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3898 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3899 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3900 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3901 WREG32(mmSH_MEM_CONFIG, tmp);
3902 tmp = adev->mc.shared_aperture_start >> 48;
3903 WREG32(mmSH_MEM_BASES, tmp);
3906 WREG32(mmSH_MEM_APE1_BASE, 1);
3907 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3908 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3910 vi_srbm_select(adev, 0, 0, 0, 0);
3911 mutex_unlock(&adev->srbm_mutex);
3913 gfx_v8_0_init_compute_vmid(adev);
3915 mutex_lock(&adev->grbm_idx_mutex);
3917 * making sure that the following register writes will be broadcasted
3918 * to all the shaders
3920 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3922 WREG32(mmPA_SC_FIFO_SIZE,
3923 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3924 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3925 (adev->gfx.config.sc_prim_fifo_size_backend <<
3926 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3927 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3928 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3929 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3930 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3932 tmp = RREG32(mmSPI_ARB_PRIORITY);
3933 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3934 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3935 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3936 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3937 WREG32(mmSPI_ARB_PRIORITY, tmp);
3939 mutex_unlock(&adev->grbm_idx_mutex);
3943 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3948 mutex_lock(&adev->grbm_idx_mutex);
3949 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3950 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3951 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3952 for (k = 0; k < adev->usec_timeout; k++) {
3953 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3959 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3960 mutex_unlock(&adev->grbm_idx_mutex);
3962 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3963 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3964 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3965 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3966 for (k = 0; k < adev->usec_timeout; k++) {
3967 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3973 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3976 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3978 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3979 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3980 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3981 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3983 WREG32(mmCP_INT_CNTL_RING0, tmp);
3986 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3989 WREG32(mmRLC_CSIB_ADDR_HI,
3990 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3991 WREG32(mmRLC_CSIB_ADDR_LO,
3992 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3993 WREG32(mmRLC_CSIB_LENGTH,
3994 adev->gfx.rlc.clear_state_size);
3997 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4000 int *unique_indices,
4003 int *ind_start_offsets,
4008 bool new_entry = true;
4010 for (; ind_offset < list_size; ind_offset++) {
4014 ind_start_offsets[*offset_count] = ind_offset;
4015 *offset_count = *offset_count + 1;
4016 BUG_ON(*offset_count >= max_offset);
4019 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4026 /* look for the matching indice */
4028 indices < *indices_count;
4030 if (unique_indices[indices] ==
4031 register_list_format[ind_offset])
4035 if (indices >= *indices_count) {
4036 unique_indices[*indices_count] =
4037 register_list_format[ind_offset];
4038 indices = *indices_count;
4039 *indices_count = *indices_count + 1;
4040 BUG_ON(*indices_count >= max_indices);
4043 register_list_format[ind_offset] = indices;
4047 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4050 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4051 int indices_count = 0;
4052 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4053 int offset_count = 0;
4056 unsigned int *register_list_format =
4057 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4058 if (!register_list_format)
4060 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4061 adev->gfx.rlc.reg_list_format_size_bytes);
4063 gfx_v8_0_parse_ind_reg_list(register_list_format,
4064 RLC_FormatDirectRegListLength,
4065 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4068 sizeof(unique_indices) / sizeof(int),
4069 indirect_start_offsets,
4071 sizeof(indirect_start_offsets)/sizeof(int));
4073 /* save and restore list */
4074 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4076 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4077 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4078 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4081 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4082 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4083 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4085 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4086 list_size = list_size >> 1;
4087 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4088 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4090 /* starting offsets starts */
4091 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4092 adev->gfx.rlc.starting_offsets_start);
4093 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4094 WREG32(mmRLC_GPM_SCRATCH_DATA,
4095 indirect_start_offsets[i]);
4097 /* unique indices */
4098 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4099 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4100 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4101 if (unique_indices[i] != 0) {
4102 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4103 WREG32(data + i, unique_indices[i] >> 20);
4106 kfree(register_list_format);
4111 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4113 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4116 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4120 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4122 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4123 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4124 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4125 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4126 WREG32(mmRLC_PG_DELAY, data);
4128 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4129 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4133 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4136 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4139 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4142 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4145 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4147 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4150 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4152 if ((adev->asic_type == CHIP_CARRIZO) ||
4153 (adev->asic_type == CHIP_STONEY)) {
4154 gfx_v8_0_init_csb(adev);
4155 gfx_v8_0_init_save_restore_list(adev);
4156 gfx_v8_0_enable_save_restore_machine(adev);
4157 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4158 gfx_v8_0_init_power_gating(adev);
4159 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4160 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4161 (adev->asic_type == CHIP_POLARIS12)) {
4162 gfx_v8_0_init_csb(adev);
4163 gfx_v8_0_init_save_restore_list(adev);
4164 gfx_v8_0_enable_save_restore_machine(adev);
4165 gfx_v8_0_init_power_gating(adev);
4170 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4172 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4174 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4175 gfx_v8_0_wait_for_rlc_serdes(adev);
4178 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4180 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4183 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4187 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4189 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4191 /* carrizo do enable cp interrupt after cp inited */
4192 if (!(adev->flags & AMD_IS_APU))
4193 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4198 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4200 const struct rlc_firmware_header_v2_0 *hdr;
4201 const __le32 *fw_data;
4202 unsigned i, fw_size;
4204 if (!adev->gfx.rlc_fw)
4207 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4208 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4210 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4211 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4212 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4214 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4215 for (i = 0; i < fw_size; i++)
4216 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4217 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4222 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4227 gfx_v8_0_rlc_stop(adev);
4230 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4231 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4232 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4233 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4234 if (adev->asic_type == CHIP_POLARIS11 ||
4235 adev->asic_type == CHIP_POLARIS10 ||
4236 adev->asic_type == CHIP_POLARIS12) {
4237 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4239 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4243 WREG32(mmRLC_PG_CNTL, 0);
4245 gfx_v8_0_rlc_reset(adev);
4246 gfx_v8_0_init_pg(adev);
4248 if (!adev->pp_enabled) {
4249 if (!adev->firmware.smu_load) {
4250 /* legacy rlc firmware loading */
4251 r = gfx_v8_0_rlc_load_microcode(adev);
4255 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4256 AMDGPU_UCODE_ID_RLC_G);
4262 gfx_v8_0_rlc_start(adev);
4267 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4270 u32 tmp = RREG32(mmCP_ME_CNTL);
4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4274 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4280 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4281 adev->gfx.gfx_ring[i].ready = false;
4283 WREG32(mmCP_ME_CNTL, tmp);
4287 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4289 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4290 const struct gfx_firmware_header_v1_0 *ce_hdr;
4291 const struct gfx_firmware_header_v1_0 *me_hdr;
4292 const __le32 *fw_data;
4293 unsigned i, fw_size;
4295 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4298 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4299 adev->gfx.pfp_fw->data;
4300 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4301 adev->gfx.ce_fw->data;
4302 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4303 adev->gfx.me_fw->data;
4305 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4306 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4307 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4309 gfx_v8_0_cp_gfx_enable(adev, false);
4312 fw_data = (const __le32 *)
4313 (adev->gfx.pfp_fw->data +
4314 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4317 for (i = 0; i < fw_size; i++)
4318 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4322 fw_data = (const __le32 *)
4323 (adev->gfx.ce_fw->data +
4324 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4325 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4326 WREG32(mmCP_CE_UCODE_ADDR, 0);
4327 for (i = 0; i < fw_size; i++)
4328 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4329 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4332 fw_data = (const __le32 *)
4333 (adev->gfx.me_fw->data +
4334 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4335 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4336 WREG32(mmCP_ME_RAM_WADDR, 0);
4337 for (i = 0; i < fw_size; i++)
4338 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4339 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4344 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4347 const struct cs_section_def *sect = NULL;
4348 const struct cs_extent_def *ext = NULL;
4350 /* begin clear state */
4352 /* context control state */
4355 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4356 for (ext = sect->section; ext->extent != NULL; ++ext) {
4357 if (sect->id == SECT_CONTEXT)
4358 count += 2 + ext->reg_count;
4363 /* pa_sc_raster_config/pa_sc_raster_config1 */
4365 /* end clear state */
4373 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4375 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4376 const struct cs_section_def *sect = NULL;
4377 const struct cs_extent_def *ext = NULL;
4381 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4382 WREG32(mmCP_ENDIAN_SWAP, 0);
4383 WREG32(mmCP_DEVICE_ID, 1);
4385 gfx_v8_0_cp_gfx_enable(adev, true);
4387 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4389 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4393 /* clear state buffer */
4394 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4395 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4397 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4398 amdgpu_ring_write(ring, 0x80000000);
4399 amdgpu_ring_write(ring, 0x80000000);
4401 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4402 for (ext = sect->section; ext->extent != NULL; ++ext) {
4403 if (sect->id == SECT_CONTEXT) {
4404 amdgpu_ring_write(ring,
4405 PACKET3(PACKET3_SET_CONTEXT_REG,
4407 amdgpu_ring_write(ring,
4408 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4409 for (i = 0; i < ext->reg_count; i++)
4410 amdgpu_ring_write(ring, ext->extent[i]);
4415 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4416 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4417 switch (adev->asic_type) {
4419 case CHIP_POLARIS10:
4420 amdgpu_ring_write(ring, 0x16000012);
4421 amdgpu_ring_write(ring, 0x0000002A);
4423 case CHIP_POLARIS11:
4424 case CHIP_POLARIS12:
4425 amdgpu_ring_write(ring, 0x16000012);
4426 amdgpu_ring_write(ring, 0x00000000);
4429 amdgpu_ring_write(ring, 0x3a00161a);
4430 amdgpu_ring_write(ring, 0x0000002e);
4433 amdgpu_ring_write(ring, 0x00000002);
4434 amdgpu_ring_write(ring, 0x00000000);
4437 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4438 0x00000000 : 0x00000002);
4439 amdgpu_ring_write(ring, 0x00000000);
4442 amdgpu_ring_write(ring, 0x00000000);
4443 amdgpu_ring_write(ring, 0x00000000);
4449 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4450 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4452 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4453 amdgpu_ring_write(ring, 0);
4455 /* init the CE partitions */
4456 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4457 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4458 amdgpu_ring_write(ring, 0x8000);
4459 amdgpu_ring_write(ring, 0x8000);
4461 amdgpu_ring_commit(ring);
4466 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4468 struct amdgpu_ring *ring;
4471 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4474 /* Set the write pointer delay */
4475 WREG32(mmCP_RB_WPTR_DELAY, 0);
4477 /* set the RB to use vmid 0 */
4478 WREG32(mmCP_RB_VMID, 0);
4480 /* Set ring buffer size */
4481 ring = &adev->gfx.gfx_ring[0];
4482 rb_bufsz = order_base_2(ring->ring_size / 8);
4483 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4484 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4485 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4486 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4490 WREG32(mmCP_RB0_CNTL, tmp);
4492 /* Initialize the ring buffer's read and write pointers */
4493 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4495 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4497 /* set the wb address wether it's enabled or not */
4498 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4499 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4500 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4502 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4503 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4504 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4506 WREG32(mmCP_RB0_CNTL, tmp);
4508 rb_addr = ring->gpu_addr >> 8;
4509 WREG32(mmCP_RB0_BASE, rb_addr);
4510 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4512 /* no gfx doorbells on iceland */
4513 if (adev->asic_type != CHIP_TOPAZ) {
4514 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4515 if (ring->use_doorbell) {
4516 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4517 DOORBELL_OFFSET, ring->doorbell_index);
4518 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4520 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4523 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4526 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4528 if (adev->asic_type == CHIP_TONGA) {
4529 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4530 DOORBELL_RANGE_LOWER,
4531 AMDGPU_DOORBELL_GFX_RING0);
4532 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4534 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4535 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4540 /* start the ring */
4541 amdgpu_ring_clear_ring(ring);
4542 gfx_v8_0_cp_gfx_start(adev);
4544 r = amdgpu_ring_test_ring(ring);
4546 ring->ready = false;
4551 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4556 WREG32(mmCP_MEC_CNTL, 0);
4558 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4559 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4560 adev->gfx.compute_ring[i].ready = false;
4565 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4567 const struct gfx_firmware_header_v1_0 *mec_hdr;
4568 const __le32 *fw_data;
4569 unsigned i, fw_size;
4571 if (!adev->gfx.mec_fw)
4574 gfx_v8_0_cp_compute_enable(adev, false);
4576 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4577 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4579 fw_data = (const __le32 *)
4580 (adev->gfx.mec_fw->data +
4581 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4582 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4585 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4586 for (i = 0; i < fw_size; i++)
4587 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4588 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4590 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4591 if (adev->gfx.mec2_fw) {
4592 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4594 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4595 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4597 fw_data = (const __le32 *)
4598 (adev->gfx.mec2_fw->data +
4599 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4600 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4602 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4603 for (i = 0; i < fw_size; i++)
4604 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4605 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4611 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4615 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4616 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4618 if (ring->mqd_obj) {
4619 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4620 if (unlikely(r != 0))
4621 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4623 amdgpu_bo_unpin(ring->mqd_obj);
4624 amdgpu_bo_unreserve(ring->mqd_obj);
4626 amdgpu_bo_unref(&ring->mqd_obj);
4627 ring->mqd_obj = NULL;
4628 ring->mqd_ptr = NULL;
4629 ring->mqd_gpu_addr = 0;
4635 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4638 struct amdgpu_device *adev = ring->adev;
4640 /* tell RLC which is KIQ queue */
4641 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4643 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4644 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4646 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4649 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4651 amdgpu_ring_alloc(ring, 8);
4653 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4654 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4655 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
4656 amdgpu_ring_write(ring, 0); /* queue mask hi */
4657 amdgpu_ring_write(ring, 0); /* gws mask lo */
4658 amdgpu_ring_write(ring, 0); /* gws mask hi */
4659 amdgpu_ring_write(ring, 0); /* oac mask */
4660 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
4661 amdgpu_ring_commit(ring);
4665 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4666 struct amdgpu_ring *ring)
4668 struct amdgpu_device *adev = kiq_ring->adev;
4669 uint64_t mqd_addr, wptr_addr;
4671 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4672 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4673 amdgpu_ring_alloc(kiq_ring, 8);
4675 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4676 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4677 amdgpu_ring_write(kiq_ring, 0x21010000);
4678 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4679 (ring->queue << 26) |
4680 (ring->pipe << 29) |
4681 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4682 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4683 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4684 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4685 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4686 amdgpu_ring_commit(kiq_ring);
4690 static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4692 uint64_t mqd_gpu_addr,
4693 uint64_t eop_gpu_addr,
4694 struct amdgpu_ring *ring)
4696 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4699 mqd->header = 0xC0310800;
4700 mqd->compute_pipelinestat_enable = 0x00000001;
4701 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4702 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4703 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4704 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4705 mqd->compute_misc_reserved = 0x00000003;
4707 eop_base_addr = eop_gpu_addr >> 8;
4708 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4709 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4711 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4712 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4713 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4714 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4716 mqd->cp_hqd_eop_control = tmp;
4718 /* enable doorbell? */
4719 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4721 if (ring->use_doorbell)
4722 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4728 mqd->cp_hqd_pq_doorbell_control = tmp;
4730 /* disable the queue if it's active */
4731 mqd->cp_hqd_dequeue_request = 0;
4732 mqd->cp_hqd_pq_rptr = 0;
4733 mqd->cp_hqd_pq_wptr = 0;
4735 /* set the pointer to the MQD */
4736 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4737 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4739 /* set MQD vmid to 0 */
4740 tmp = RREG32(mmCP_MQD_CONTROL);
4741 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4742 mqd->cp_mqd_control = tmp;
4744 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4745 hqd_gpu_addr = ring->gpu_addr >> 8;
4746 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4747 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4749 /* set up the HQD, this is similar to CP_RB0_CNTL */
4750 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4751 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4752 (order_base_2(ring->ring_size / 4) - 1));
4753 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4754 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4756 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4759 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4760 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4762 mqd->cp_hqd_pq_control = tmp;
4764 /* set the wb address whether it's enabled or not */
4765 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4766 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4767 mqd->cp_hqd_pq_rptr_report_addr_hi =
4768 upper_32_bits(wb_gpu_addr) & 0xffff;
4770 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4771 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4772 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4773 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4776 /* enable the doorbell if requested */
4777 if (ring->use_doorbell) {
4778 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4779 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4780 DOORBELL_OFFSET, ring->doorbell_index);
4782 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4784 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4785 DOORBELL_SOURCE, 0);
4786 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4790 mqd->cp_hqd_pq_doorbell_control = tmp;
4792 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4794 mqd->cp_hqd_pq_wptr = ring->wptr;
4795 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4797 /* set the vmid for the queue */
4798 mqd->cp_hqd_vmid = 0;
4800 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4801 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4802 mqd->cp_hqd_persistent_state = tmp;
4804 /* activate the queue */
4805 mqd->cp_hqd_active = 1;
4810 static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4812 struct amdgpu_ring *ring)
4817 /* disable wptr polling */
4818 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4819 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4820 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4822 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4823 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4825 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4826 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4828 /* enable doorbell? */
4829 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4831 /* disable the queue if it's active */
4832 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4833 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4834 for (j = 0; j < adev->usec_timeout; j++) {
4835 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4839 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4840 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4841 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4844 /* set the pointer to the MQD */
4845 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4846 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4848 /* set MQD vmid to 0 */
4849 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4851 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4852 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4853 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4855 /* set up the HQD, this is similar to CP_RB0_CNTL */
4856 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4858 /* set the wb address whether it's enabled or not */
4859 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4860 mqd->cp_hqd_pq_rptr_report_addr_lo);
4861 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4862 mqd->cp_hqd_pq_rptr_report_addr_hi);
4864 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4865 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4866 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4868 /* enable the doorbell if requested */
4869 if (ring->use_doorbell) {
4870 if ((adev->asic_type == CHIP_CARRIZO) ||
4871 (adev->asic_type == CHIP_FIJI) ||
4872 (adev->asic_type == CHIP_STONEY)) {
4873 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4874 AMDGPU_DOORBELL_KIQ << 2);
4875 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4876 AMDGPU_DOORBELL_MEC_RING7 << 2);
4879 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4881 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4882 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4884 /* set the vmid for the queue */
4885 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4887 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4889 /* activate the queue */
4890 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4892 if (ring->use_doorbell) {
4893 tmp = RREG32(mmCP_PQ_STATUS);
4894 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4895 WREG32(mmCP_PQ_STATUS, tmp);
4901 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4905 struct amdgpu_device *adev = ring->adev;
4906 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4907 uint64_t eop_gpu_addr;
4908 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
4909 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4912 eop_gpu_addr = kiq->eop_gpu_addr;
4913 gfx_v8_0_kiq_setting(&kiq->ring);
4915 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4916 ring->queue * MEC_HPD_SIZE;
4917 mqd_idx = ring - &adev->gfx.compute_ring[0];
4920 if (!adev->gfx.in_reset) {
4921 memset((void *)mqd, 0, sizeof(*mqd));
4922 mutex_lock(&adev->srbm_mutex);
4923 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4924 gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4926 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4927 vi_srbm_select(adev, 0, 0, 0, 0);
4928 mutex_unlock(&adev->srbm_mutex);
4930 if (adev->gfx.mec.mqd_backup[mqd_idx])
4931 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4932 } else { /* for GPU_RESET case */
4933 /* reset MQD to a clean status */
4934 if (adev->gfx.mec.mqd_backup[mqd_idx])
4935 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4937 /* reset ring buffer */
4939 amdgpu_ring_clear_ring(ring);
4942 mutex_lock(&adev->srbm_mutex);
4943 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4944 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4945 vi_srbm_select(adev, 0, 0, 0, 0);
4946 mutex_unlock(&adev->srbm_mutex);
4951 gfx_v8_0_kiq_enable(ring);
4953 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4958 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4960 struct amdgpu_ring *ring = NULL;
4963 gfx_v8_0_cp_compute_enable(adev, true);
4965 ring = &adev->gfx.kiq.ring;
4966 if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
4967 r = gfx_v8_0_kiq_init_queue(ring,
4968 (struct vi_mqd *)ring->mqd_ptr,
4969 ring->mqd_gpu_addr);
4970 amdgpu_bo_kunmap(ring->mqd_obj);
4971 ring->mqd_ptr = NULL;
4978 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4979 ring = &adev->gfx.compute_ring[i];
4980 if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
4981 r = gfx_v8_0_kiq_init_queue(ring,
4982 (struct vi_mqd *)ring->mqd_ptr,
4983 ring->mqd_gpu_addr);
4984 amdgpu_bo_kunmap(ring->mqd_obj);
4985 ring->mqd_ptr = NULL;
4993 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4994 ring = &adev->gfx.compute_ring[i];
4997 r = amdgpu_ring_test_ring(ring);
4999 ring->ready = false;
5002 ring = &adev->gfx.kiq.ring;
5004 r = amdgpu_ring_test_ring(ring);
5006 ring->ready = false;
5011 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5015 bool use_doorbell = true;
5023 /* init the queues. */
5024 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5025 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5027 if (ring->mqd_obj == NULL) {
5028 r = amdgpu_bo_create(adev,
5029 sizeof(struct vi_mqd),
5031 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5032 NULL, &ring->mqd_obj);
5034 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5039 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5040 if (unlikely(r != 0)) {
5041 gfx_v8_0_cp_compute_fini(adev);
5044 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5047 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5048 gfx_v8_0_cp_compute_fini(adev);
5051 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5053 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5054 gfx_v8_0_cp_compute_fini(adev);
5058 /* init the mqd struct */
5059 memset(buf, 0, sizeof(struct vi_mqd));
5061 mqd = (struct vi_mqd *)buf;
5062 mqd->header = 0xC0310800;
5063 mqd->compute_pipelinestat_enable = 0x00000001;
5064 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5065 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5066 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5067 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5068 mqd->compute_misc_reserved = 0x00000003;
5070 mutex_lock(&adev->srbm_mutex);
5071 vi_srbm_select(adev, ring->me,
5075 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5078 /* write the EOP addr */
5079 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5080 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5082 /* set the VMID assigned */
5083 WREG32(mmCP_HQD_VMID, 0);
5085 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5086 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5087 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5088 (order_base_2(MEC_HPD_SIZE / 4) - 1));
5089 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5091 /* disable wptr polling */
5092 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5093 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5094 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5096 mqd->cp_hqd_eop_base_addr_lo =
5097 RREG32(mmCP_HQD_EOP_BASE_ADDR);
5098 mqd->cp_hqd_eop_base_addr_hi =
5099 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5101 /* enable doorbell? */
5102 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5104 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5106 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5108 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5109 mqd->cp_hqd_pq_doorbell_control = tmp;
5111 /* disable the queue if it's active */
5112 mqd->cp_hqd_dequeue_request = 0;
5113 mqd->cp_hqd_pq_rptr = 0;
5114 mqd->cp_hqd_pq_wptr= 0;
5115 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5116 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5117 for (j = 0; j < adev->usec_timeout; j++) {
5118 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5122 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5123 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5124 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5127 /* set the pointer to the MQD */
5128 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5129 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5130 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5131 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5133 /* set MQD vmid to 0 */
5134 tmp = RREG32(mmCP_MQD_CONTROL);
5135 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5136 WREG32(mmCP_MQD_CONTROL, tmp);
5137 mqd->cp_mqd_control = tmp;
5139 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5140 hqd_gpu_addr = ring->gpu_addr >> 8;
5141 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5142 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5143 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5144 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5146 /* set up the HQD, this is similar to CP_RB0_CNTL */
5147 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5148 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5149 (order_base_2(ring->ring_size / 4) - 1));
5150 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5151 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5153 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5155 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5157 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5158 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5159 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5160 mqd->cp_hqd_pq_control = tmp;
5162 /* set the wb address wether it's enabled or not */
5163 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5164 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5165 mqd->cp_hqd_pq_rptr_report_addr_hi =
5166 upper_32_bits(wb_gpu_addr) & 0xffff;
5167 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5168 mqd->cp_hqd_pq_rptr_report_addr_lo);
5169 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5170 mqd->cp_hqd_pq_rptr_report_addr_hi);
5172 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5173 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5174 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5175 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5176 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5177 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5178 mqd->cp_hqd_pq_wptr_poll_addr_hi);
5180 /* enable the doorbell if requested */
5182 if ((adev->asic_type == CHIP_CARRIZO) ||
5183 (adev->asic_type == CHIP_FIJI) ||
5184 (adev->asic_type == CHIP_STONEY) ||
5185 (adev->asic_type == CHIP_POLARIS11) ||
5186 (adev->asic_type == CHIP_POLARIS10) ||
5187 (adev->asic_type == CHIP_POLARIS12)) {
5188 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5189 AMDGPU_DOORBELL_KIQ << 2);
5190 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5191 AMDGPU_DOORBELL_MEC_RING7 << 2);
5193 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5194 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5195 DOORBELL_OFFSET, ring->doorbell_index);
5196 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5197 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5198 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5199 mqd->cp_hqd_pq_doorbell_control = tmp;
5202 mqd->cp_hqd_pq_doorbell_control = 0;
5204 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5205 mqd->cp_hqd_pq_doorbell_control);
5207 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5209 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
5210 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5211 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5213 /* set the vmid for the queue */
5214 mqd->cp_hqd_vmid = 0;
5215 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5217 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5218 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5219 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5220 mqd->cp_hqd_persistent_state = tmp;
5221 if (adev->asic_type == CHIP_STONEY ||
5222 adev->asic_type == CHIP_POLARIS11 ||
5223 adev->asic_type == CHIP_POLARIS10 ||
5224 adev->asic_type == CHIP_POLARIS12) {
5225 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5226 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5227 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5230 /* activate the queue */
5231 mqd->cp_hqd_active = 1;
5232 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5234 vi_srbm_select(adev, 0, 0, 0, 0);
5235 mutex_unlock(&adev->srbm_mutex);
5237 amdgpu_bo_kunmap(ring->mqd_obj);
5238 amdgpu_bo_unreserve(ring->mqd_obj);
5242 tmp = RREG32(mmCP_PQ_STATUS);
5243 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5244 WREG32(mmCP_PQ_STATUS, tmp);
5247 gfx_v8_0_cp_compute_enable(adev, true);
5249 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5250 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5253 r = amdgpu_ring_test_ring(ring);
5255 ring->ready = false;
5261 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5265 if (!(adev->flags & AMD_IS_APU))
5266 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5268 if (!adev->pp_enabled) {
5269 if (!adev->firmware.smu_load) {
5270 /* legacy firmware loading */
5271 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5275 r = gfx_v8_0_cp_compute_load_microcode(adev);
5279 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5280 AMDGPU_UCODE_ID_CP_CE);
5284 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5285 AMDGPU_UCODE_ID_CP_PFP);
5289 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5290 AMDGPU_UCODE_ID_CP_ME);
5294 if (adev->asic_type == CHIP_TOPAZ) {
5295 r = gfx_v8_0_cp_compute_load_microcode(adev);
5299 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5300 AMDGPU_UCODE_ID_CP_MEC1);
5307 r = gfx_v8_0_cp_gfx_resume(adev);
5311 if (amdgpu_sriov_vf(adev))
5312 r = gfx_v8_0_kiq_resume(adev);
5314 r = gfx_v8_0_cp_compute_resume(adev);
5318 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5323 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5325 gfx_v8_0_cp_gfx_enable(adev, enable);
5326 gfx_v8_0_cp_compute_enable(adev, enable);
5329 static int gfx_v8_0_hw_init(void *handle)
5332 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5334 gfx_v8_0_init_golden_registers(adev);
5335 gfx_v8_0_gpu_init(adev);
5337 r = gfx_v8_0_rlc_resume(adev);
5341 r = gfx_v8_0_cp_resume(adev);
5346 static int gfx_v8_0_hw_fini(void *handle)
5348 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5350 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5351 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5352 if (amdgpu_sriov_vf(adev)) {
5353 pr_debug("For SRIOV client, shouldn't do anything.\n");
5356 gfx_v8_0_cp_enable(adev, false);
5357 gfx_v8_0_rlc_stop(adev);
5358 gfx_v8_0_cp_compute_fini(adev);
5360 amdgpu_set_powergating_state(adev,
5361 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5366 static int gfx_v8_0_suspend(void *handle)
5368 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5370 return gfx_v8_0_hw_fini(adev);
5373 static int gfx_v8_0_resume(void *handle)
5375 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5377 return gfx_v8_0_hw_init(adev);
5380 static bool gfx_v8_0_is_idle(void *handle)
5382 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5384 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5390 static int gfx_v8_0_wait_for_idle(void *handle)
5393 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5395 for (i = 0; i < adev->usec_timeout; i++) {
5396 if (gfx_v8_0_is_idle(handle))
5404 static bool gfx_v8_0_check_soft_reset(void *handle)
5406 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5407 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5411 tmp = RREG32(mmGRBM_STATUS);
5412 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5413 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5414 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5415 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5416 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5417 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5418 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5419 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5420 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5421 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5422 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5423 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5424 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5428 tmp = RREG32(mmGRBM_STATUS2);
5429 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5430 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5431 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5433 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5434 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5435 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5436 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5438 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5440 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5442 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5443 SOFT_RESET_GRBM, 1);
5447 tmp = RREG32(mmSRBM_STATUS);
5448 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5449 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5450 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5451 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5452 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5453 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5455 if (grbm_soft_reset || srbm_soft_reset) {
5456 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5457 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5460 adev->gfx.grbm_soft_reset = 0;
5461 adev->gfx.srbm_soft_reset = 0;
5466 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5467 struct amdgpu_ring *ring)
5471 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5472 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5474 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5475 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5477 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5478 for (i = 0; i < adev->usec_timeout; i++) {
5479 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5486 static int gfx_v8_0_pre_soft_reset(void *handle)
5488 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5489 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5491 if ((!adev->gfx.grbm_soft_reset) &&
5492 (!adev->gfx.srbm_soft_reset))
5495 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5496 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5499 gfx_v8_0_rlc_stop(adev);
5501 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5502 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5503 /* Disable GFX parsing/prefetching */
5504 gfx_v8_0_cp_gfx_enable(adev, false);
5506 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5507 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5508 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5509 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5512 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5513 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5515 gfx_v8_0_inactive_hqd(adev, ring);
5517 /* Disable MEC parsing/prefetching */
5518 gfx_v8_0_cp_compute_enable(adev, false);
5524 static int gfx_v8_0_soft_reset(void *handle)
5526 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5527 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5530 if ((!adev->gfx.grbm_soft_reset) &&
5531 (!adev->gfx.srbm_soft_reset))
5534 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5535 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5537 if (grbm_soft_reset || srbm_soft_reset) {
5538 tmp = RREG32(mmGMCON_DEBUG);
5539 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5540 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5541 WREG32(mmGMCON_DEBUG, tmp);
5545 if (grbm_soft_reset) {
5546 tmp = RREG32(mmGRBM_SOFT_RESET);
5547 tmp |= grbm_soft_reset;
5548 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5549 WREG32(mmGRBM_SOFT_RESET, tmp);
5550 tmp = RREG32(mmGRBM_SOFT_RESET);
5554 tmp &= ~grbm_soft_reset;
5555 WREG32(mmGRBM_SOFT_RESET, tmp);
5556 tmp = RREG32(mmGRBM_SOFT_RESET);
5559 if (srbm_soft_reset) {
5560 tmp = RREG32(mmSRBM_SOFT_RESET);
5561 tmp |= srbm_soft_reset;
5562 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5563 WREG32(mmSRBM_SOFT_RESET, tmp);
5564 tmp = RREG32(mmSRBM_SOFT_RESET);
5568 tmp &= ~srbm_soft_reset;
5569 WREG32(mmSRBM_SOFT_RESET, tmp);
5570 tmp = RREG32(mmSRBM_SOFT_RESET);
5573 if (grbm_soft_reset || srbm_soft_reset) {
5574 tmp = RREG32(mmGMCON_DEBUG);
5575 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5576 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5577 WREG32(mmGMCON_DEBUG, tmp);
5580 /* Wait a little for things to settle down */
5586 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5587 struct amdgpu_ring *ring)
5589 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5590 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5591 WREG32(mmCP_HQD_PQ_RPTR, 0);
5592 WREG32(mmCP_HQD_PQ_WPTR, 0);
5593 vi_srbm_select(adev, 0, 0, 0, 0);
5596 static int gfx_v8_0_post_soft_reset(void *handle)
5598 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5599 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5601 if ((!adev->gfx.grbm_soft_reset) &&
5602 (!adev->gfx.srbm_soft_reset))
5605 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5606 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5608 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5609 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5610 gfx_v8_0_cp_gfx_resume(adev);
5612 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5613 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5614 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5615 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5618 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5619 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5621 gfx_v8_0_init_hqd(adev, ring);
5623 gfx_v8_0_cp_compute_resume(adev);
5625 gfx_v8_0_rlc_start(adev);
5631 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5633 * @adev: amdgpu_device pointer
5635 * Fetches a GPU clock counter snapshot.
5636 * Returns the 64 bit clock counter snapshot.
5638 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5642 mutex_lock(&adev->gfx.gpu_clock_mutex);
5643 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5644 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5645 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5646 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5650 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5652 uint32_t gds_base, uint32_t gds_size,
5653 uint32_t gws_base, uint32_t gws_size,
5654 uint32_t oa_base, uint32_t oa_size)
5656 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5657 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5659 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5660 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5662 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5663 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5666 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5667 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5668 WRITE_DATA_DST_SEL(0)));
5669 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5670 amdgpu_ring_write(ring, 0);
5671 amdgpu_ring_write(ring, gds_base);
5674 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5675 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5676 WRITE_DATA_DST_SEL(0)));
5677 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5678 amdgpu_ring_write(ring, 0);
5679 amdgpu_ring_write(ring, gds_size);
5682 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5684 WRITE_DATA_DST_SEL(0)));
5685 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5686 amdgpu_ring_write(ring, 0);
5687 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5690 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5691 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5692 WRITE_DATA_DST_SEL(0)));
5693 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5694 amdgpu_ring_write(ring, 0);
5695 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5698 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5700 WREG32(mmSQ_IND_INDEX,
5701 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5702 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5703 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5704 (SQ_IND_INDEX__FORCE_READ_MASK));
5705 return RREG32(mmSQ_IND_DATA);
5708 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5709 uint32_t wave, uint32_t thread,
5710 uint32_t regno, uint32_t num, uint32_t *out)
5712 WREG32(mmSQ_IND_INDEX,
5713 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5714 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5715 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5716 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5717 (SQ_IND_INDEX__FORCE_READ_MASK) |
5718 (SQ_IND_INDEX__AUTO_INCR_MASK));
5720 *(out++) = RREG32(mmSQ_IND_DATA);
5723 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5725 /* type 0 wave data */
5726 dst[(*no_fields)++] = 0;
5727 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5728 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5729 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5730 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5731 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5732 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5733 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5740 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5741 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5742 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5743 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5744 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5747 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5748 uint32_t wave, uint32_t start,
5749 uint32_t size, uint32_t *dst)
5752 adev, simd, wave, 0,
5753 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5757 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5758 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5759 .select_se_sh = &gfx_v8_0_select_se_sh,
5760 .read_wave_data = &gfx_v8_0_read_wave_data,
5761 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5764 static int gfx_v8_0_early_init(void *handle)
5766 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5768 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5769 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5770 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5771 gfx_v8_0_set_ring_funcs(adev);
5772 gfx_v8_0_set_irq_funcs(adev);
5773 gfx_v8_0_set_gds_init(adev);
5774 gfx_v8_0_set_rlc_funcs(adev);
5779 static int gfx_v8_0_late_init(void *handle)
5781 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5784 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5788 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5792 /* requires IBs so do in late init after IB pool is initialized */
5793 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5797 amdgpu_set_powergating_state(adev,
5798 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5803 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5806 if ((adev->asic_type == CHIP_POLARIS11) ||
5807 (adev->asic_type == CHIP_POLARIS12))
5808 /* Send msg to SMU via Powerplay */
5809 amdgpu_set_powergating_state(adev,
5810 AMD_IP_BLOCK_TYPE_SMC,
5812 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5814 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5817 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5820 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5823 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5826 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5829 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5832 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5835 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5838 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5840 /* Read any GFX register to wake up GFX. */
5842 RREG32(mmDB_RENDER_CONTROL);
5845 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5848 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5849 cz_enable_gfx_cg_power_gating(adev, true);
5850 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5851 cz_enable_gfx_pipeline_power_gating(adev, true);
5853 cz_enable_gfx_cg_power_gating(adev, false);
5854 cz_enable_gfx_pipeline_power_gating(adev, false);
5858 static int gfx_v8_0_set_powergating_state(void *handle,
5859 enum amd_powergating_state state)
5861 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5862 bool enable = (state == AMD_PG_STATE_GATE);
5864 if (amdgpu_sriov_vf(adev))
5867 switch (adev->asic_type) {
5871 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5872 cz_enable_sck_slow_down_on_power_up(adev, true);
5873 cz_enable_sck_slow_down_on_power_down(adev, true);
5875 cz_enable_sck_slow_down_on_power_up(adev, false);
5876 cz_enable_sck_slow_down_on_power_down(adev, false);
5878 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5879 cz_enable_cp_power_gating(adev, true);
5881 cz_enable_cp_power_gating(adev, false);
5883 cz_update_gfx_cg_power_gating(adev, enable);
5885 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5886 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5888 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5890 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5891 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5893 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5895 case CHIP_POLARIS11:
5896 case CHIP_POLARIS12:
5897 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5898 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5900 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5902 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5903 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5905 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5907 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5908 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5910 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5919 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5921 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5924 if (amdgpu_sriov_vf(adev))
5927 /* AMD_CG_SUPPORT_GFX_MGCG */
5928 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5929 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5930 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5932 /* AMD_CG_SUPPORT_GFX_CGLG */
5933 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5935 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5937 /* AMD_CG_SUPPORT_GFX_CGLS */
5938 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5939 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5941 /* AMD_CG_SUPPORT_GFX_CGTS */
5942 data = RREG32(mmCGTS_SM_CTRL_REG);
5943 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5944 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5946 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5947 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5948 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5950 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5951 data = RREG32(mmRLC_MEM_SLP_CNTL);
5952 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5953 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5955 /* AMD_CG_SUPPORT_GFX_CP_LS */
5956 data = RREG32(mmCP_MEM_SLP_CNTL);
5957 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5958 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5961 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5962 uint32_t reg_addr, uint32_t cmd)
5966 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5968 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5969 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5971 data = RREG32(mmRLC_SERDES_WR_CTRL);
5972 if (adev->asic_type == CHIP_STONEY)
5973 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5974 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5975 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5976 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5977 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5978 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5979 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5980 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5981 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5983 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5984 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5985 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5986 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5987 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5988 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5989 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5990 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5991 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5992 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5993 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5994 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5995 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5996 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5997 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5999 WREG32(mmRLC_SERDES_WR_CTRL, data);
6002 #define MSG_ENTER_RLC_SAFE_MODE 1
6003 #define MSG_EXIT_RLC_SAFE_MODE 0
6004 #define RLC_GPR_REG2__REQ_MASK 0x00000001
6005 #define RLC_GPR_REG2__REQ__SHIFT 0
6006 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6007 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
6009 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6014 data = RREG32(mmRLC_CNTL);
6015 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6018 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6019 data |= RLC_SAFE_MODE__CMD_MASK;
6020 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6021 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6022 WREG32(mmRLC_SAFE_MODE, data);
6024 for (i = 0; i < adev->usec_timeout; i++) {
6025 if ((RREG32(mmRLC_GPM_STAT) &
6026 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6027 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6028 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6029 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6034 for (i = 0; i < adev->usec_timeout; i++) {
6035 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6039 adev->gfx.rlc.in_safe_mode = true;
6043 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6048 data = RREG32(mmRLC_CNTL);
6049 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6052 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6053 if (adev->gfx.rlc.in_safe_mode) {
6054 data |= RLC_SAFE_MODE__CMD_MASK;
6055 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6056 WREG32(mmRLC_SAFE_MODE, data);
6057 adev->gfx.rlc.in_safe_mode = false;
6061 for (i = 0; i < adev->usec_timeout; i++) {
6062 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6068 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6069 .enter_safe_mode = iceland_enter_rlc_safe_mode,
6070 .exit_safe_mode = iceland_exit_rlc_safe_mode
6073 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6076 uint32_t temp, data;
6078 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6080 /* It is disabled by HW by default */
6081 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6083 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6084 /* 1 - RLC memory Light sleep */
6085 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6087 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6088 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6091 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6092 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6093 if (adev->flags & AMD_IS_APU)
6094 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6095 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6096 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6098 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6099 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6100 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6101 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6104 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6106 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6107 gfx_v8_0_wait_for_rlc_serdes(adev);
6109 /* 5 - clear mgcg override */
6110 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6112 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6113 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6114 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6115 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6116 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6117 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6118 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6119 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6120 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6121 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6122 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6123 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6125 WREG32(mmCGTS_SM_CTRL_REG, data);
6129 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6130 gfx_v8_0_wait_for_rlc_serdes(adev);
6132 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6133 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6134 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6135 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6136 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6137 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6139 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6141 /* 2 - disable MGLS in RLC */
6142 data = RREG32(mmRLC_MEM_SLP_CNTL);
6143 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6144 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6145 WREG32(mmRLC_MEM_SLP_CNTL, data);
6148 /* 3 - disable MGLS in CP */
6149 data = RREG32(mmCP_MEM_SLP_CNTL);
6150 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6151 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6152 WREG32(mmCP_MEM_SLP_CNTL, data);
6155 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6156 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6157 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6158 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6160 WREG32(mmCGTS_SM_CTRL_REG, data);
6162 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6163 gfx_v8_0_wait_for_rlc_serdes(adev);
6165 /* 6 - set mgcg override */
6166 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6170 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6171 gfx_v8_0_wait_for_rlc_serdes(adev);
6174 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6177 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6180 uint32_t temp, temp1, data, data1;
6182 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6184 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6186 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6187 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6188 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6190 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6192 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6193 gfx_v8_0_wait_for_rlc_serdes(adev);
6195 /* 2 - clear cgcg override */
6196 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6198 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6199 gfx_v8_0_wait_for_rlc_serdes(adev);
6201 /* 3 - write cmd to set CGLS */
6202 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6204 /* 4 - enable cgcg */
6205 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6207 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6209 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6211 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6212 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6215 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6217 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6221 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6223 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6224 * Cmp_busy/GFX_Idle interrupts
6226 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6228 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6229 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6232 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6233 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6234 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6236 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6238 /* read gfx register to wake up cgcg */
6239 RREG32(mmCB_CGTT_SCLK_CTRL);
6240 RREG32(mmCB_CGTT_SCLK_CTRL);
6241 RREG32(mmCB_CGTT_SCLK_CTRL);
6242 RREG32(mmCB_CGTT_SCLK_CTRL);
6244 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6245 gfx_v8_0_wait_for_rlc_serdes(adev);
6247 /* write cmd to Set CGCG Overrride */
6248 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6250 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6251 gfx_v8_0_wait_for_rlc_serdes(adev);
6253 /* write cmd to Clear CGLS */
6254 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6256 /* disable cgcg, cgls should be disabled too. */
6257 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6258 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6260 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6263 gfx_v8_0_wait_for_rlc_serdes(adev);
6265 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6267 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6271 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6272 * === MGCG + MGLS + TS(CG/LS) ===
6274 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6275 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6277 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6278 * === CGCG + CGLS ===
6280 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6281 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6286 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6287 enum amd_clockgating_state state)
6289 uint32_t msg_id, pp_state = 0;
6290 uint32_t pp_support_state = 0;
6291 void *pp_handle = adev->powerplay.pp_handle;
6293 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6294 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6295 pp_support_state = PP_STATE_SUPPORT_LS;
6296 pp_state = PP_STATE_LS;
6298 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6299 pp_support_state |= PP_STATE_SUPPORT_CG;
6300 pp_state |= PP_STATE_CG;
6302 if (state == AMD_CG_STATE_UNGATE)
6305 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6309 amd_set_clockgating_by_smu(pp_handle, msg_id);
6312 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6313 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6314 pp_support_state = PP_STATE_SUPPORT_LS;
6315 pp_state = PP_STATE_LS;
6318 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6319 pp_support_state |= PP_STATE_SUPPORT_CG;
6320 pp_state |= PP_STATE_CG;
6323 if (state == AMD_CG_STATE_UNGATE)
6326 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6330 amd_set_clockgating_by_smu(pp_handle, msg_id);
6336 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6337 enum amd_clockgating_state state)
6340 uint32_t msg_id, pp_state = 0;
6341 uint32_t pp_support_state = 0;
6342 void *pp_handle = adev->powerplay.pp_handle;
6344 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6345 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6346 pp_support_state = PP_STATE_SUPPORT_LS;
6347 pp_state = PP_STATE_LS;
6349 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6350 pp_support_state |= PP_STATE_SUPPORT_CG;
6351 pp_state |= PP_STATE_CG;
6353 if (state == AMD_CG_STATE_UNGATE)
6356 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6360 amd_set_clockgating_by_smu(pp_handle, msg_id);
6363 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6364 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6365 pp_support_state = PP_STATE_SUPPORT_LS;
6366 pp_state = PP_STATE_LS;
6368 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6369 pp_support_state |= PP_STATE_SUPPORT_CG;
6370 pp_state |= PP_STATE_CG;
6372 if (state == AMD_CG_STATE_UNGATE)
6375 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6379 amd_set_clockgating_by_smu(pp_handle, msg_id);
6382 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6383 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6384 pp_support_state = PP_STATE_SUPPORT_LS;
6385 pp_state = PP_STATE_LS;
6388 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6389 pp_support_state |= PP_STATE_SUPPORT_CG;
6390 pp_state |= PP_STATE_CG;
6393 if (state == AMD_CG_STATE_UNGATE)
6396 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6400 amd_set_clockgating_by_smu(pp_handle, msg_id);
6403 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6404 pp_support_state = PP_STATE_SUPPORT_LS;
6406 if (state == AMD_CG_STATE_UNGATE)
6409 pp_state = PP_STATE_LS;
6411 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6415 amd_set_clockgating_by_smu(pp_handle, msg_id);
6418 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6419 pp_support_state = PP_STATE_SUPPORT_LS;
6421 if (state == AMD_CG_STATE_UNGATE)
6424 pp_state = PP_STATE_LS;
6425 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6429 amd_set_clockgating_by_smu(pp_handle, msg_id);
6435 static int gfx_v8_0_set_clockgating_state(void *handle,
6436 enum amd_clockgating_state state)
6438 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6440 if (amdgpu_sriov_vf(adev))
6443 switch (adev->asic_type) {
6447 gfx_v8_0_update_gfx_clock_gating(adev,
6448 state == AMD_CG_STATE_GATE);
6451 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6453 case CHIP_POLARIS10:
6454 case CHIP_POLARIS11:
6455 case CHIP_POLARIS12:
6456 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6464 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6466 return ring->adev->wb.wb[ring->rptr_offs];
6469 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6471 struct amdgpu_device *adev = ring->adev;
6473 if (ring->use_doorbell)
6474 /* XXX check if swapping is necessary on BE */
6475 return ring->adev->wb.wb[ring->wptr_offs];
6477 return RREG32(mmCP_RB0_WPTR);
6480 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6482 struct amdgpu_device *adev = ring->adev;
6484 if (ring->use_doorbell) {
6485 /* XXX check if swapping is necessary on BE */
6486 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6487 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6489 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6490 (void)RREG32(mmCP_RB0_WPTR);
6494 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6496 u32 ref_and_mask, reg_mem_engine;
6498 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6499 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6502 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6505 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6512 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6513 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6516 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6517 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6518 WAIT_REG_MEM_FUNCTION(3) | /* == */
6520 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6521 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6522 amdgpu_ring_write(ring, ref_and_mask);
6523 amdgpu_ring_write(ring, ref_and_mask);
6524 amdgpu_ring_write(ring, 0x20); /* poll interval */
6527 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6529 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6530 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6533 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6534 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6539 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6541 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6542 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6543 WRITE_DATA_DST_SEL(0) |
6545 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6546 amdgpu_ring_write(ring, 0);
6547 amdgpu_ring_write(ring, 1);
6551 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6552 struct amdgpu_ib *ib,
6553 unsigned vm_id, bool ctx_switch)
6555 u32 header, control = 0;
6557 if (ib->flags & AMDGPU_IB_FLAG_CE)
6558 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6560 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6562 control |= ib->length_dw | (vm_id << 24);
6564 amdgpu_ring_write(ring, header);
6565 amdgpu_ring_write(ring,
6569 (ib->gpu_addr & 0xFFFFFFFC));
6570 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6571 amdgpu_ring_write(ring, control);
6574 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6575 struct amdgpu_ib *ib,
6576 unsigned vm_id, bool ctx_switch)
6578 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6580 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6581 amdgpu_ring_write(ring,
6585 (ib->gpu_addr & 0xFFFFFFFC));
6586 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6587 amdgpu_ring_write(ring, control);
6590 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6591 u64 seq, unsigned flags)
6593 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6594 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6596 /* EVENT_WRITE_EOP - flush caches, send int */
6597 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6598 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6600 EOP_TC_WB_ACTION_EN |
6601 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6603 amdgpu_ring_write(ring, addr & 0xfffffffc);
6604 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6605 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6606 amdgpu_ring_write(ring, lower_32_bits(seq));
6607 amdgpu_ring_write(ring, upper_32_bits(seq));
6611 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6613 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6614 uint32_t seq = ring->fence_drv.sync_seq;
6615 uint64_t addr = ring->fence_drv.gpu_addr;
6617 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6618 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6619 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6620 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6621 amdgpu_ring_write(ring, addr & 0xfffffffc);
6622 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6623 amdgpu_ring_write(ring, seq);
6624 amdgpu_ring_write(ring, 0xffffffff);
6625 amdgpu_ring_write(ring, 4); /* poll interval */
6628 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6629 unsigned vm_id, uint64_t pd_addr)
6631 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6633 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6634 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6635 WRITE_DATA_DST_SEL(0)) |
6638 amdgpu_ring_write(ring,
6639 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6641 amdgpu_ring_write(ring,
6642 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6644 amdgpu_ring_write(ring, 0);
6645 amdgpu_ring_write(ring, pd_addr >> 12);
6647 /* bits 0-15 are the VM contexts0-15 */
6648 /* invalidate the cache */
6649 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6650 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6651 WRITE_DATA_DST_SEL(0)));
6652 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6653 amdgpu_ring_write(ring, 0);
6654 amdgpu_ring_write(ring, 1 << vm_id);
6656 /* wait for the invalidate to complete */
6657 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6658 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6659 WAIT_REG_MEM_FUNCTION(0) | /* always */
6660 WAIT_REG_MEM_ENGINE(0))); /* me */
6661 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6662 amdgpu_ring_write(ring, 0);
6663 amdgpu_ring_write(ring, 0); /* ref */
6664 amdgpu_ring_write(ring, 0); /* mask */
6665 amdgpu_ring_write(ring, 0x20); /* poll interval */
6667 /* compute doesn't have PFP */
6669 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6670 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6671 amdgpu_ring_write(ring, 0x0);
6672 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6673 amdgpu_ring_insert_nop(ring, 128);
6677 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6679 return ring->adev->wb.wb[ring->wptr_offs];
6682 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6684 struct amdgpu_device *adev = ring->adev;
6686 /* XXX check if swapping is necessary on BE */
6687 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6688 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6691 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6695 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6696 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6698 /* RELEASE_MEM - flush caches, send int */
6699 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6700 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6702 EOP_TC_WB_ACTION_EN |
6703 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6705 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6706 amdgpu_ring_write(ring, addr & 0xfffffffc);
6707 amdgpu_ring_write(ring, upper_32_bits(addr));
6708 amdgpu_ring_write(ring, lower_32_bits(seq));
6709 amdgpu_ring_write(ring, upper_32_bits(seq));
6712 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6713 u64 seq, unsigned int flags)
6715 /* we only allocate 32bit for each seq wb address */
6716 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6718 /* write fence seq to the "addr" */
6719 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6720 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6721 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6722 amdgpu_ring_write(ring, lower_32_bits(addr));
6723 amdgpu_ring_write(ring, upper_32_bits(addr));
6724 amdgpu_ring_write(ring, lower_32_bits(seq));
6726 if (flags & AMDGPU_FENCE_FLAG_INT) {
6727 /* set register to trigger INT */
6728 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6729 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6730 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6731 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6732 amdgpu_ring_write(ring, 0);
6733 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6737 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6739 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6740 amdgpu_ring_write(ring, 0);
6743 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6747 if (amdgpu_sriov_vf(ring->adev))
6748 gfx_v8_0_ring_emit_ce_meta_init(ring,
6749 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6751 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6752 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6753 gfx_v8_0_ring_emit_vgt_flush(ring);
6754 /* set load_global_config & load_global_uconfig */
6756 /* set load_cs_sh_regs */
6758 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6761 /* set load_ce_ram if preamble presented */
6762 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6765 /* still load_ce_ram if this is the first time preamble presented
6766 * although there is no context switch happens.
6768 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6772 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6773 amdgpu_ring_write(ring, dw2);
6774 amdgpu_ring_write(ring, 0);
6776 if (amdgpu_sriov_vf(ring->adev))
6777 gfx_v8_0_ring_emit_de_meta_init(ring,
6778 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6781 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6783 struct amdgpu_device *adev = ring->adev;
6785 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6786 amdgpu_ring_write(ring, 0 | /* src: register*/
6787 (5 << 8) | /* dst: memory */
6788 (1 << 20)); /* write confirm */
6789 amdgpu_ring_write(ring, reg);
6790 amdgpu_ring_write(ring, 0);
6791 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6792 adev->virt.reg_val_offs * 4));
6793 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6794 adev->virt.reg_val_offs * 4));
6797 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6800 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6801 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6802 amdgpu_ring_write(ring, reg);
6803 amdgpu_ring_write(ring, 0);
6804 amdgpu_ring_write(ring, val);
6807 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6808 enum amdgpu_interrupt_state state)
6810 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6811 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6814 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6816 enum amdgpu_interrupt_state state)
6819 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6820 * handles the setting of interrupts for this specific pipe. All other
6821 * pipes' interrupts are set by amdkfd.
6829 DRM_DEBUG("invalid pipe %d\n", pipe);
6833 DRM_DEBUG("invalid me %d\n", me);
6837 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6838 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6841 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6842 struct amdgpu_irq_src *source,
6844 enum amdgpu_interrupt_state state)
6846 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6847 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6852 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6853 struct amdgpu_irq_src *source,
6855 enum amdgpu_interrupt_state state)
6857 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6858 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6863 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6864 struct amdgpu_irq_src *src,
6866 enum amdgpu_interrupt_state state)
6869 case AMDGPU_CP_IRQ_GFX_EOP:
6870 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6872 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6873 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6875 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6876 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6878 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6879 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6881 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6882 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6884 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6885 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6887 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6888 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6890 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6891 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6893 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6894 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6902 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6903 struct amdgpu_irq_src *source,
6904 struct amdgpu_iv_entry *entry)
6907 u8 me_id, pipe_id, queue_id;
6908 struct amdgpu_ring *ring;
6910 DRM_DEBUG("IH: CP EOP\n");
6911 me_id = (entry->ring_id & 0x0c) >> 2;
6912 pipe_id = (entry->ring_id & 0x03) >> 0;
6913 queue_id = (entry->ring_id & 0x70) >> 4;
6917 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6921 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6922 ring = &adev->gfx.compute_ring[i];
6923 /* Per-queue interrupt is supported for MEC starting from VI.
6924 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6926 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6927 amdgpu_fence_process(ring);
6934 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6935 struct amdgpu_irq_src *source,
6936 struct amdgpu_iv_entry *entry)
6938 DRM_ERROR("Illegal register access in command stream\n");
6939 schedule_work(&adev->reset_work);
6943 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6944 struct amdgpu_irq_src *source,
6945 struct amdgpu_iv_entry *entry)
6947 DRM_ERROR("Illegal instruction in command stream\n");
6948 schedule_work(&adev->reset_work);
6952 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6953 struct amdgpu_irq_src *src,
6955 enum amdgpu_interrupt_state state)
6957 uint32_t tmp, target;
6958 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6960 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
6963 target = mmCP_ME1_PIPE0_INT_CNTL;
6965 target = mmCP_ME2_PIPE0_INT_CNTL;
6966 target += ring->pipe;
6969 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6970 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6971 tmp = RREG32(mmCPC_INT_CNTL);
6972 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6973 GENERIC2_INT_ENABLE, 0);
6974 WREG32(mmCPC_INT_CNTL, tmp);
6976 tmp = RREG32(target);
6977 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6978 GENERIC2_INT_ENABLE, 0);
6979 WREG32(target, tmp);
6981 tmp = RREG32(mmCPC_INT_CNTL);
6982 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6983 GENERIC2_INT_ENABLE, 1);
6984 WREG32(mmCPC_INT_CNTL, tmp);
6986 tmp = RREG32(target);
6987 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6988 GENERIC2_INT_ENABLE, 1);
6989 WREG32(target, tmp);
6993 BUG(); /* kiq only support GENERIC2_INT now */
6999 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7000 struct amdgpu_irq_src *source,
7001 struct amdgpu_iv_entry *entry)
7003 u8 me_id, pipe_id, queue_id;
7004 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7006 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
7008 me_id = (entry->ring_id & 0x0c) >> 2;
7009 pipe_id = (entry->ring_id & 0x03) >> 0;
7010 queue_id = (entry->ring_id & 0x70) >> 4;
7011 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7012 me_id, pipe_id, queue_id);
7014 amdgpu_fence_process(ring);
7018 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7020 .early_init = gfx_v8_0_early_init,
7021 .late_init = gfx_v8_0_late_init,
7022 .sw_init = gfx_v8_0_sw_init,
7023 .sw_fini = gfx_v8_0_sw_fini,
7024 .hw_init = gfx_v8_0_hw_init,
7025 .hw_fini = gfx_v8_0_hw_fini,
7026 .suspend = gfx_v8_0_suspend,
7027 .resume = gfx_v8_0_resume,
7028 .is_idle = gfx_v8_0_is_idle,
7029 .wait_for_idle = gfx_v8_0_wait_for_idle,
7030 .check_soft_reset = gfx_v8_0_check_soft_reset,
7031 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7032 .soft_reset = gfx_v8_0_soft_reset,
7033 .post_soft_reset = gfx_v8_0_post_soft_reset,
7034 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7035 .set_powergating_state = gfx_v8_0_set_powergating_state,
7036 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7039 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7040 .type = AMDGPU_RING_TYPE_GFX,
7042 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7043 .support_64bit_ptrs = false,
7044 .get_rptr = gfx_v8_0_ring_get_rptr,
7045 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7046 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7048 20 + /* gfx_v8_0_ring_emit_gds_switch */
7049 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7050 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7051 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7052 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7053 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
7054 2 + /* gfx_v8_ring_emit_sb */
7055 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
7056 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7057 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7058 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7059 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7060 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7061 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7062 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7063 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7064 .test_ring = gfx_v8_0_ring_test_ring,
7065 .test_ib = gfx_v8_0_ring_test_ib,
7066 .insert_nop = amdgpu_ring_insert_nop,
7067 .pad_ib = amdgpu_ring_generic_pad_ib,
7068 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7069 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7072 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7073 .type = AMDGPU_RING_TYPE_COMPUTE,
7075 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7076 .support_64bit_ptrs = false,
7077 .get_rptr = gfx_v8_0_ring_get_rptr,
7078 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7079 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7081 20 + /* gfx_v8_0_ring_emit_gds_switch */
7082 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7083 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7084 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7085 17 + /* gfx_v8_0_ring_emit_vm_flush */
7086 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7087 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7088 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7089 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7090 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7091 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7092 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7093 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7094 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7095 .test_ring = gfx_v8_0_ring_test_ring,
7096 .test_ib = gfx_v8_0_ring_test_ib,
7097 .insert_nop = amdgpu_ring_insert_nop,
7098 .pad_ib = amdgpu_ring_generic_pad_ib,
7101 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7102 .type = AMDGPU_RING_TYPE_KIQ,
7104 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7105 .support_64bit_ptrs = false,
7106 .get_rptr = gfx_v8_0_ring_get_rptr,
7107 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7108 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7110 20 + /* gfx_v8_0_ring_emit_gds_switch */
7111 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7112 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7113 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7114 17 + /* gfx_v8_0_ring_emit_vm_flush */
7115 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7116 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7117 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7118 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7119 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7120 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7121 .test_ring = gfx_v8_0_ring_test_ring,
7122 .test_ib = gfx_v8_0_ring_test_ib,
7123 .insert_nop = amdgpu_ring_insert_nop,
7124 .pad_ib = amdgpu_ring_generic_pad_ib,
7125 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7126 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7129 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7133 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7135 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7136 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7138 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7139 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7142 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7143 .set = gfx_v8_0_set_eop_interrupt_state,
7144 .process = gfx_v8_0_eop_irq,
7147 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7148 .set = gfx_v8_0_set_priv_reg_fault_state,
7149 .process = gfx_v8_0_priv_reg_irq,
7152 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7153 .set = gfx_v8_0_set_priv_inst_fault_state,
7154 .process = gfx_v8_0_priv_inst_irq,
7157 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7158 .set = gfx_v8_0_kiq_set_interrupt_state,
7159 .process = gfx_v8_0_kiq_irq,
7162 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7164 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7165 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7167 adev->gfx.priv_reg_irq.num_types = 1;
7168 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7170 adev->gfx.priv_inst_irq.num_types = 1;
7171 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7173 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7174 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7177 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7179 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7182 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7184 /* init asci gds info */
7185 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7186 adev->gds.gws.total_size = 64;
7187 adev->gds.oa.total_size = 16;
7189 if (adev->gds.mem.total_size == 64 * 1024) {
7190 adev->gds.mem.gfx_partition_size = 4096;
7191 adev->gds.mem.cs_partition_size = 4096;
7193 adev->gds.gws.gfx_partition_size = 4;
7194 adev->gds.gws.cs_partition_size = 4;
7196 adev->gds.oa.gfx_partition_size = 4;
7197 adev->gds.oa.cs_partition_size = 1;
7199 adev->gds.mem.gfx_partition_size = 1024;
7200 adev->gds.mem.cs_partition_size = 1024;
7202 adev->gds.gws.gfx_partition_size = 16;
7203 adev->gds.gws.cs_partition_size = 16;
7205 adev->gds.oa.gfx_partition_size = 4;
7206 adev->gds.oa.cs_partition_size = 4;
7210 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7218 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7219 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7221 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7224 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7228 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7229 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7231 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7233 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7236 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7238 int i, j, k, counter, active_cu_number = 0;
7239 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7240 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7241 unsigned disable_masks[4 * 2];
7243 memset(cu_info, 0, sizeof(*cu_info));
7245 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7247 mutex_lock(&adev->grbm_idx_mutex);
7248 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7249 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7253 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7255 gfx_v8_0_set_user_cu_inactive_bitmap(
7256 adev, disable_masks[i * 2 + j]);
7257 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7258 cu_info->bitmap[i][j] = bitmap;
7260 for (k = 0; k < 16; k ++) {
7261 if (bitmap & mask) {
7268 active_cu_number += counter;
7269 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7272 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7273 mutex_unlock(&adev->grbm_idx_mutex);
7275 cu_info->number = active_cu_number;
7276 cu_info->ao_cu_mask = ao_cu_mask;
7279 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7281 .type = AMD_IP_BLOCK_TYPE_GFX,
7285 .funcs = &gfx_v8_0_ip_funcs,
7288 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7290 .type = AMD_IP_BLOCK_TYPE_GFX,
7294 .funcs = &gfx_v8_0_ip_funcs,
7297 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7299 uint64_t ce_payload_addr;
7302 struct vi_ce_ib_state regular;
7303 struct vi_ce_ib_state_chained_ib chained;
7306 if (ring->adev->virt.chained_ib_support) {
7307 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7308 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7310 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
7311 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7314 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7315 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7316 WRITE_DATA_DST_SEL(8) |
7318 WRITE_DATA_CACHE_POLICY(0));
7319 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7320 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7321 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7324 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7326 uint64_t de_payload_addr, gds_addr;
7329 struct vi_de_ib_state regular;
7330 struct vi_de_ib_state_chained_ib chained;
7333 gds_addr = csa_addr + 4096;
7334 if (ring->adev->virt.chained_ib_support) {
7335 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7336 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7337 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7338 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7340 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7341 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7342 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7343 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7346 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7347 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7348 WRITE_DATA_DST_SEL(8) |
7350 WRITE_DATA_CACHE_POLICY(0));
7351 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7352 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7353 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7356 /* create MQD for each compute queue */
7357 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7359 struct amdgpu_ring *ring = NULL;
7362 /* create MQD for KIQ */
7363 ring = &adev->gfx.kiq.ring;
7364 if (!ring->mqd_obj) {
7365 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7366 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7367 &ring->mqd_gpu_addr, &ring->mqd_ptr);
7369 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7373 /* prepare MQD backup */
7374 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7375 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7376 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7379 /* create MQD for each KCQ */
7380 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7381 ring = &adev->gfx.compute_ring[i];
7382 if (!ring->mqd_obj) {
7383 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7384 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7385 &ring->mqd_gpu_addr, &ring->mqd_ptr);
7387 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7391 /* prepare MQD backup */
7392 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7393 if (!adev->gfx.mec.mqd_backup[i])
7394 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7401 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7403 struct amdgpu_ring *ring = NULL;
7406 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7407 ring = &adev->gfx.compute_ring[i];
7408 kfree(adev->gfx.mec.mqd_backup[i]);
7409 amdgpu_bo_free_kernel(&ring->mqd_obj,
7410 &ring->mqd_gpu_addr,
7414 ring = &adev->gfx.kiq.ring;
7415 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7416 amdgpu_bo_free_kernel(&ring->mqd_obj,
7417 &ring->mqd_gpu_addr,