]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/iommu/arm-smmu-v3.c
iommu/arm-smmu: Treat IOMMU_DOMAIN_DMA as bypass for now
[karo-tx-linux.git] / drivers / iommu / arm-smmu-v3.c
1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/iommu.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/msi.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_platform.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36
37 #include "io-pgtable.h"
38
39 /* MMIO registers */
40 #define ARM_SMMU_IDR0                   0x0
41 #define IDR0_ST_LVL_SHIFT               27
42 #define IDR0_ST_LVL_MASK                0x3
43 #define IDR0_ST_LVL_2LVL                (1 << IDR0_ST_LVL_SHIFT)
44 #define IDR0_STALL_MODEL_SHIFT          24
45 #define IDR0_STALL_MODEL_MASK           0x3
46 #define IDR0_STALL_MODEL_STALL          (0 << IDR0_STALL_MODEL_SHIFT)
47 #define IDR0_STALL_MODEL_FORCE          (2 << IDR0_STALL_MODEL_SHIFT)
48 #define IDR0_TTENDIAN_SHIFT             21
49 #define IDR0_TTENDIAN_MASK              0x3
50 #define IDR0_TTENDIAN_LE                (2 << IDR0_TTENDIAN_SHIFT)
51 #define IDR0_TTENDIAN_BE                (3 << IDR0_TTENDIAN_SHIFT)
52 #define IDR0_TTENDIAN_MIXED             (0 << IDR0_TTENDIAN_SHIFT)
53 #define IDR0_CD2L                       (1 << 19)
54 #define IDR0_VMID16                     (1 << 18)
55 #define IDR0_PRI                        (1 << 16)
56 #define IDR0_SEV                        (1 << 14)
57 #define IDR0_MSI                        (1 << 13)
58 #define IDR0_ASID16                     (1 << 12)
59 #define IDR0_ATS                        (1 << 10)
60 #define IDR0_HYP                        (1 << 9)
61 #define IDR0_COHACC                     (1 << 4)
62 #define IDR0_TTF_SHIFT                  2
63 #define IDR0_TTF_MASK                   0x3
64 #define IDR0_TTF_AARCH64                (2 << IDR0_TTF_SHIFT)
65 #define IDR0_TTF_AARCH32_64             (3 << IDR0_TTF_SHIFT)
66 #define IDR0_S1P                        (1 << 1)
67 #define IDR0_S2P                        (1 << 0)
68
69 #define ARM_SMMU_IDR1                   0x4
70 #define IDR1_TABLES_PRESET              (1 << 30)
71 #define IDR1_QUEUES_PRESET              (1 << 29)
72 #define IDR1_REL                        (1 << 28)
73 #define IDR1_CMDQ_SHIFT                 21
74 #define IDR1_CMDQ_MASK                  0x1f
75 #define IDR1_EVTQ_SHIFT                 16
76 #define IDR1_EVTQ_MASK                  0x1f
77 #define IDR1_PRIQ_SHIFT                 11
78 #define IDR1_PRIQ_MASK                  0x1f
79 #define IDR1_SSID_SHIFT                 6
80 #define IDR1_SSID_MASK                  0x1f
81 #define IDR1_SID_SHIFT                  0
82 #define IDR1_SID_MASK                   0x3f
83
84 #define ARM_SMMU_IDR5                   0x14
85 #define IDR5_STALL_MAX_SHIFT            16
86 #define IDR5_STALL_MAX_MASK             0xffff
87 #define IDR5_GRAN64K                    (1 << 6)
88 #define IDR5_GRAN16K                    (1 << 5)
89 #define IDR5_GRAN4K                     (1 << 4)
90 #define IDR5_OAS_SHIFT                  0
91 #define IDR5_OAS_MASK                   0x7
92 #define IDR5_OAS_32_BIT                 (0 << IDR5_OAS_SHIFT)
93 #define IDR5_OAS_36_BIT                 (1 << IDR5_OAS_SHIFT)
94 #define IDR5_OAS_40_BIT                 (2 << IDR5_OAS_SHIFT)
95 #define IDR5_OAS_42_BIT                 (3 << IDR5_OAS_SHIFT)
96 #define IDR5_OAS_44_BIT                 (4 << IDR5_OAS_SHIFT)
97 #define IDR5_OAS_48_BIT                 (5 << IDR5_OAS_SHIFT)
98
99 #define ARM_SMMU_CR0                    0x20
100 #define CR0_CMDQEN                      (1 << 3)
101 #define CR0_EVTQEN                      (1 << 2)
102 #define CR0_PRIQEN                      (1 << 1)
103 #define CR0_SMMUEN                      (1 << 0)
104
105 #define ARM_SMMU_CR0ACK                 0x24
106
107 #define ARM_SMMU_CR1                    0x28
108 #define CR1_SH_NSH                      0
109 #define CR1_SH_OSH                      2
110 #define CR1_SH_ISH                      3
111 #define CR1_CACHE_NC                    0
112 #define CR1_CACHE_WB                    1
113 #define CR1_CACHE_WT                    2
114 #define CR1_TABLE_SH_SHIFT              10
115 #define CR1_TABLE_OC_SHIFT              8
116 #define CR1_TABLE_IC_SHIFT              6
117 #define CR1_QUEUE_SH_SHIFT              4
118 #define CR1_QUEUE_OC_SHIFT              2
119 #define CR1_QUEUE_IC_SHIFT              0
120
121 #define ARM_SMMU_CR2                    0x2c
122 #define CR2_PTM                         (1 << 2)
123 #define CR2_RECINVSID                   (1 << 1)
124 #define CR2_E2H                         (1 << 0)
125
126 #define ARM_SMMU_IRQ_CTRL               0x50
127 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
128 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
129 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
130
131 #define ARM_SMMU_IRQ_CTRLACK            0x54
132
133 #define ARM_SMMU_GERROR                 0x60
134 #define GERROR_SFM_ERR                  (1 << 8)
135 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
136 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
137 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
138 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
139 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
140 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
141 #define GERROR_CMDQ_ERR                 (1 << 0)
142 #define GERROR_ERR_MASK                 0xfd
143
144 #define ARM_SMMU_GERRORN                0x64
145
146 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
147 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
148 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
149
150 #define ARM_SMMU_STRTAB_BASE            0x80
151 #define STRTAB_BASE_RA                  (1UL << 62)
152 #define STRTAB_BASE_ADDR_SHIFT          6
153 #define STRTAB_BASE_ADDR_MASK           0x3ffffffffffUL
154
155 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
156 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT  0
157 #define STRTAB_BASE_CFG_LOG2SIZE_MASK   0x3f
158 #define STRTAB_BASE_CFG_SPLIT_SHIFT     6
159 #define STRTAB_BASE_CFG_SPLIT_MASK      0x1f
160 #define STRTAB_BASE_CFG_FMT_SHIFT       16
161 #define STRTAB_BASE_CFG_FMT_MASK        0x3
162 #define STRTAB_BASE_CFG_FMT_LINEAR      (0 << STRTAB_BASE_CFG_FMT_SHIFT)
163 #define STRTAB_BASE_CFG_FMT_2LVL        (1 << STRTAB_BASE_CFG_FMT_SHIFT)
164
165 #define ARM_SMMU_CMDQ_BASE              0x90
166 #define ARM_SMMU_CMDQ_PROD              0x98
167 #define ARM_SMMU_CMDQ_CONS              0x9c
168
169 #define ARM_SMMU_EVTQ_BASE              0xa0
170 #define ARM_SMMU_EVTQ_PROD              0x100a8
171 #define ARM_SMMU_EVTQ_CONS              0x100ac
172 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
173 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
174 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
175
176 #define ARM_SMMU_PRIQ_BASE              0xc0
177 #define ARM_SMMU_PRIQ_PROD              0x100c8
178 #define ARM_SMMU_PRIQ_CONS              0x100cc
179 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
180 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
181 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
182
183 /* Common MSI config fields */
184 #define MSI_CFG0_ADDR_SHIFT             2
185 #define MSI_CFG0_ADDR_MASK              0x3fffffffffffUL
186 #define MSI_CFG2_SH_SHIFT               4
187 #define MSI_CFG2_SH_NSH                 (0UL << MSI_CFG2_SH_SHIFT)
188 #define MSI_CFG2_SH_OSH                 (2UL << MSI_CFG2_SH_SHIFT)
189 #define MSI_CFG2_SH_ISH                 (3UL << MSI_CFG2_SH_SHIFT)
190 #define MSI_CFG2_MEMATTR_SHIFT          0
191 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE   (0x1 << MSI_CFG2_MEMATTR_SHIFT)
192
193 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG                 (1 << 31)
196 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p)                     ((q)->base +                    \
198                                          Q_IDX(q, p) * (q)->ent_dwords)
199
200 #define Q_BASE_RWA                      (1UL << 62)
201 #define Q_BASE_ADDR_SHIFT               5
202 #define Q_BASE_ADDR_MASK                0xfffffffffffUL
203 #define Q_BASE_LOG2SIZE_SHIFT           0
204 #define Q_BASE_LOG2SIZE_MASK            0x1fUL
205
206 /*
207  * Stream table.
208  *
209  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
210  * 2lvl: 128k L1 entries,
211  *       256 lazy entries per table (each table covers a PCI bus)
212  */
213 #define STRTAB_L1_SZ_SHIFT              20
214 #define STRTAB_SPLIT                    8
215
216 #define STRTAB_L1_DESC_DWORDS           1
217 #define STRTAB_L1_DESC_SPAN_SHIFT       0
218 #define STRTAB_L1_DESC_SPAN_MASK        0x1fUL
219 #define STRTAB_L1_DESC_L2PTR_SHIFT      6
220 #define STRTAB_L1_DESC_L2PTR_MASK       0x3ffffffffffUL
221
222 #define STRTAB_STE_DWORDS               8
223 #define STRTAB_STE_0_V                  (1UL << 0)
224 #define STRTAB_STE_0_CFG_SHIFT          1
225 #define STRTAB_STE_0_CFG_MASK           0x7UL
226 #define STRTAB_STE_0_CFG_ABORT          (0UL << STRTAB_STE_0_CFG_SHIFT)
227 #define STRTAB_STE_0_CFG_BYPASS         (4UL << STRTAB_STE_0_CFG_SHIFT)
228 #define STRTAB_STE_0_CFG_S1_TRANS       (5UL << STRTAB_STE_0_CFG_SHIFT)
229 #define STRTAB_STE_0_CFG_S2_TRANS       (6UL << STRTAB_STE_0_CFG_SHIFT)
230
231 #define STRTAB_STE_0_S1FMT_SHIFT        4
232 #define STRTAB_STE_0_S1FMT_LINEAR       (0UL << STRTAB_STE_0_S1FMT_SHIFT)
233 #define STRTAB_STE_0_S1CTXPTR_SHIFT     6
234 #define STRTAB_STE_0_S1CTXPTR_MASK      0x3ffffffffffUL
235 #define STRTAB_STE_0_S1CDMAX_SHIFT      59
236 #define STRTAB_STE_0_S1CDMAX_MASK       0x1fUL
237
238 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
239 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
240 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
241 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
242 #define STRTAB_STE_1_S1C_SH_NSH         0UL
243 #define STRTAB_STE_1_S1C_SH_OSH         2UL
244 #define STRTAB_STE_1_S1C_SH_ISH         3UL
245 #define STRTAB_STE_1_S1CIR_SHIFT        2
246 #define STRTAB_STE_1_S1COR_SHIFT        4
247 #define STRTAB_STE_1_S1CSH_SHIFT        6
248
249 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
250
251 #define STRTAB_STE_1_EATS_ABT           0UL
252 #define STRTAB_STE_1_EATS_TRANS         1UL
253 #define STRTAB_STE_1_EATS_S1CHK         2UL
254 #define STRTAB_STE_1_EATS_SHIFT         28
255
256 #define STRTAB_STE_1_STRW_NSEL1         0UL
257 #define STRTAB_STE_1_STRW_EL2           2UL
258 #define STRTAB_STE_1_STRW_SHIFT         30
259
260 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
261 #define STRTAB_STE_1_SHCFG_SHIFT        44
262
263 #define STRTAB_STE_2_S2VMID_SHIFT       0
264 #define STRTAB_STE_2_S2VMID_MASK        0xffffUL
265 #define STRTAB_STE_2_VTCR_SHIFT         32
266 #define STRTAB_STE_2_VTCR_MASK          0x7ffffUL
267 #define STRTAB_STE_2_S2AA64             (1UL << 51)
268 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
269 #define STRTAB_STE_2_S2PTW              (1UL << 54)
270 #define STRTAB_STE_2_S2R                (1UL << 58)
271
272 #define STRTAB_STE_3_S2TTB_SHIFT        4
273 #define STRTAB_STE_3_S2TTB_MASK         0xfffffffffffUL
274
275 /* Context descriptor (stage-1 only) */
276 #define CTXDESC_CD_DWORDS               8
277 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT     0
278 #define ARM64_TCR_T0SZ_SHIFT            0
279 #define ARM64_TCR_T0SZ_MASK             0x1fUL
280 #define CTXDESC_CD_0_TCR_TG0_SHIFT      6
281 #define ARM64_TCR_TG0_SHIFT             14
282 #define ARM64_TCR_TG0_MASK              0x3UL
283 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT    8
284 #define ARM64_TCR_IRGN0_SHIFT           8
285 #define ARM64_TCR_IRGN0_MASK            0x3UL
286 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT    10
287 #define ARM64_TCR_ORGN0_SHIFT           10
288 #define ARM64_TCR_ORGN0_MASK            0x3UL
289 #define CTXDESC_CD_0_TCR_SH0_SHIFT      12
290 #define ARM64_TCR_SH0_SHIFT             12
291 #define ARM64_TCR_SH0_MASK              0x3UL
292 #define CTXDESC_CD_0_TCR_EPD0_SHIFT     14
293 #define ARM64_TCR_EPD0_SHIFT            7
294 #define ARM64_TCR_EPD0_MASK             0x1UL
295 #define CTXDESC_CD_0_TCR_EPD1_SHIFT     30
296 #define ARM64_TCR_EPD1_SHIFT            23
297 #define ARM64_TCR_EPD1_MASK             0x1UL
298
299 #define CTXDESC_CD_0_ENDI               (1UL << 15)
300 #define CTXDESC_CD_0_V                  (1UL << 31)
301
302 #define CTXDESC_CD_0_TCR_IPS_SHIFT      32
303 #define ARM64_TCR_IPS_SHIFT             32
304 #define ARM64_TCR_IPS_MASK              0x7UL
305 #define CTXDESC_CD_0_TCR_TBI0_SHIFT     38
306 #define ARM64_TCR_TBI0_SHIFT            37
307 #define ARM64_TCR_TBI0_MASK             0x1UL
308
309 #define CTXDESC_CD_0_AA64               (1UL << 41)
310 #define CTXDESC_CD_0_R                  (1UL << 45)
311 #define CTXDESC_CD_0_A                  (1UL << 46)
312 #define CTXDESC_CD_0_ASET_SHIFT         47
313 #define CTXDESC_CD_0_ASET_SHARED        (0UL << CTXDESC_CD_0_ASET_SHIFT)
314 #define CTXDESC_CD_0_ASET_PRIVATE       (1UL << CTXDESC_CD_0_ASET_SHIFT)
315 #define CTXDESC_CD_0_ASID_SHIFT         48
316 #define CTXDESC_CD_0_ASID_MASK          0xffffUL
317
318 #define CTXDESC_CD_1_TTB0_SHIFT         4
319 #define CTXDESC_CD_1_TTB0_MASK          0xfffffffffffUL
320
321 #define CTXDESC_CD_3_MAIR_SHIFT         0
322
323 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
324 #define ARM_SMMU_TCR2CD(tcr, fld)                                       \
325         (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)    \
326          << CTXDESC_CD_0_TCR_##fld##_SHIFT)
327
328 /* Command queue */
329 #define CMDQ_ENT_DWORDS                 2
330 #define CMDQ_MAX_SZ_SHIFT               8
331
332 #define CMDQ_ERR_SHIFT                  24
333 #define CMDQ_ERR_MASK                   0x7f
334 #define CMDQ_ERR_CERROR_NONE_IDX        0
335 #define CMDQ_ERR_CERROR_ILL_IDX         1
336 #define CMDQ_ERR_CERROR_ABT_IDX         2
337
338 #define CMDQ_0_OP_SHIFT                 0
339 #define CMDQ_0_OP_MASK                  0xffUL
340 #define CMDQ_0_SSV                      (1UL << 11)
341
342 #define CMDQ_PREFETCH_0_SID_SHIFT       32
343 #define CMDQ_PREFETCH_1_SIZE_SHIFT      0
344 #define CMDQ_PREFETCH_1_ADDR_MASK       ~0xfffUL
345
346 #define CMDQ_CFGI_0_SID_SHIFT           32
347 #define CMDQ_CFGI_0_SID_MASK            0xffffffffUL
348 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
349 #define CMDQ_CFGI_1_RANGE_SHIFT         0
350 #define CMDQ_CFGI_1_RANGE_MASK          0x1fUL
351
352 #define CMDQ_TLBI_0_VMID_SHIFT          32
353 #define CMDQ_TLBI_0_ASID_SHIFT          48
354 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
355 #define CMDQ_TLBI_1_VA_MASK             ~0xfffUL
356 #define CMDQ_TLBI_1_IPA_MASK            0xfffffffff000UL
357
358 #define CMDQ_PRI_0_SSID_SHIFT           12
359 #define CMDQ_PRI_0_SSID_MASK            0xfffffUL
360 #define CMDQ_PRI_0_SID_SHIFT            32
361 #define CMDQ_PRI_0_SID_MASK             0xffffffffUL
362 #define CMDQ_PRI_1_GRPID_SHIFT          0
363 #define CMDQ_PRI_1_GRPID_MASK           0x1ffUL
364 #define CMDQ_PRI_1_RESP_SHIFT           12
365 #define CMDQ_PRI_1_RESP_DENY            (0UL << CMDQ_PRI_1_RESP_SHIFT)
366 #define CMDQ_PRI_1_RESP_FAIL            (1UL << CMDQ_PRI_1_RESP_SHIFT)
367 #define CMDQ_PRI_1_RESP_SUCC            (2UL << CMDQ_PRI_1_RESP_SHIFT)
368
369 #define CMDQ_SYNC_0_CS_SHIFT            12
370 #define CMDQ_SYNC_0_CS_NONE             (0UL << CMDQ_SYNC_0_CS_SHIFT)
371 #define CMDQ_SYNC_0_CS_SEV              (2UL << CMDQ_SYNC_0_CS_SHIFT)
372
373 /* Event queue */
374 #define EVTQ_ENT_DWORDS                 4
375 #define EVTQ_MAX_SZ_SHIFT               7
376
377 #define EVTQ_0_ID_SHIFT                 0
378 #define EVTQ_0_ID_MASK                  0xffUL
379
380 /* PRI queue */
381 #define PRIQ_ENT_DWORDS                 2
382 #define PRIQ_MAX_SZ_SHIFT               8
383
384 #define PRIQ_0_SID_SHIFT                0
385 #define PRIQ_0_SID_MASK                 0xffffffffUL
386 #define PRIQ_0_SSID_SHIFT               32
387 #define PRIQ_0_SSID_MASK                0xfffffUL
388 #define PRIQ_0_PERM_PRIV                (1UL << 58)
389 #define PRIQ_0_PERM_EXEC                (1UL << 59)
390 #define PRIQ_0_PERM_READ                (1UL << 60)
391 #define PRIQ_0_PERM_WRITE               (1UL << 61)
392 #define PRIQ_0_PRG_LAST                 (1UL << 62)
393 #define PRIQ_0_SSID_V                   (1UL << 63)
394
395 #define PRIQ_1_PRG_IDX_SHIFT            0
396 #define PRIQ_1_PRG_IDX_MASK             0x1ffUL
397 #define PRIQ_1_ADDR_SHIFT               12
398 #define PRIQ_1_ADDR_MASK                0xfffffffffffffUL
399
400 /* High-level queue structures */
401 #define ARM_SMMU_POLL_TIMEOUT_US        100
402
403 static bool disable_bypass;
404 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
405 MODULE_PARM_DESC(disable_bypass,
406         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
407
408 enum pri_resp {
409         PRI_RESP_DENY,
410         PRI_RESP_FAIL,
411         PRI_RESP_SUCC,
412 };
413
414 enum arm_smmu_msi_index {
415         EVTQ_MSI_INDEX,
416         GERROR_MSI_INDEX,
417         PRIQ_MSI_INDEX,
418         ARM_SMMU_MAX_MSIS,
419 };
420
421 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
422         [EVTQ_MSI_INDEX] = {
423                 ARM_SMMU_EVTQ_IRQ_CFG0,
424                 ARM_SMMU_EVTQ_IRQ_CFG1,
425                 ARM_SMMU_EVTQ_IRQ_CFG2,
426         },
427         [GERROR_MSI_INDEX] = {
428                 ARM_SMMU_GERROR_IRQ_CFG0,
429                 ARM_SMMU_GERROR_IRQ_CFG1,
430                 ARM_SMMU_GERROR_IRQ_CFG2,
431         },
432         [PRIQ_MSI_INDEX] = {
433                 ARM_SMMU_PRIQ_IRQ_CFG0,
434                 ARM_SMMU_PRIQ_IRQ_CFG1,
435                 ARM_SMMU_PRIQ_IRQ_CFG2,
436         },
437 };
438
439 struct arm_smmu_cmdq_ent {
440         /* Common fields */
441         u8                              opcode;
442         bool                            substream_valid;
443
444         /* Command-specific fields */
445         union {
446                 #define CMDQ_OP_PREFETCH_CFG    0x1
447                 struct {
448                         u32                     sid;
449                         u8                      size;
450                         u64                     addr;
451                 } prefetch;
452
453                 #define CMDQ_OP_CFGI_STE        0x3
454                 #define CMDQ_OP_CFGI_ALL        0x4
455                 struct {
456                         u32                     sid;
457                         union {
458                                 bool            leaf;
459                                 u8              span;
460                         };
461                 } cfgi;
462
463                 #define CMDQ_OP_TLBI_NH_ASID    0x11
464                 #define CMDQ_OP_TLBI_NH_VA      0x12
465                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
466                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
467                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
468                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
469                 struct {
470                         u16                     asid;
471                         u16                     vmid;
472                         bool                    leaf;
473                         u64                     addr;
474                 } tlbi;
475
476                 #define CMDQ_OP_PRI_RESP        0x41
477                 struct {
478                         u32                     sid;
479                         u32                     ssid;
480                         u16                     grpid;
481                         enum pri_resp           resp;
482                 } pri;
483
484                 #define CMDQ_OP_CMD_SYNC        0x46
485         };
486 };
487
488 struct arm_smmu_queue {
489         int                             irq; /* Wired interrupt */
490
491         __le64                          *base;
492         dma_addr_t                      base_dma;
493         u64                             q_base;
494
495         size_t                          ent_dwords;
496         u32                             max_n_shift;
497         u32                             prod;
498         u32                             cons;
499
500         u32 __iomem                     *prod_reg;
501         u32 __iomem                     *cons_reg;
502 };
503
504 struct arm_smmu_cmdq {
505         struct arm_smmu_queue           q;
506         spinlock_t                      lock;
507 };
508
509 struct arm_smmu_evtq {
510         struct arm_smmu_queue           q;
511         u32                             max_stalls;
512 };
513
514 struct arm_smmu_priq {
515         struct arm_smmu_queue           q;
516 };
517
518 /* High-level stream table and context descriptor structures */
519 struct arm_smmu_strtab_l1_desc {
520         u8                              span;
521
522         __le64                          *l2ptr;
523         dma_addr_t                      l2ptr_dma;
524 };
525
526 struct arm_smmu_s1_cfg {
527         __le64                          *cdptr;
528         dma_addr_t                      cdptr_dma;
529
530         struct arm_smmu_ctx_desc {
531                 u16     asid;
532                 u64     ttbr;
533                 u64     tcr;
534                 u64     mair;
535         }                               cd;
536 };
537
538 struct arm_smmu_s2_cfg {
539         u16                             vmid;
540         u64                             vttbr;
541         u64                             vtcr;
542 };
543
544 struct arm_smmu_strtab_ent {
545         bool                            valid;
546
547         bool                            bypass; /* Overrides s1/s2 config */
548         struct arm_smmu_s1_cfg          *s1_cfg;
549         struct arm_smmu_s2_cfg          *s2_cfg;
550 };
551
552 struct arm_smmu_strtab_cfg {
553         __le64                          *strtab;
554         dma_addr_t                      strtab_dma;
555         struct arm_smmu_strtab_l1_desc  *l1_desc;
556         unsigned int                    num_l1_ents;
557
558         u64                             strtab_base;
559         u32                             strtab_base_cfg;
560 };
561
562 /* An SMMUv3 instance */
563 struct arm_smmu_device {
564         struct device                   *dev;
565         void __iomem                    *base;
566
567 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
568 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
569 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
570 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
571 #define ARM_SMMU_FEAT_PRI               (1 << 4)
572 #define ARM_SMMU_FEAT_ATS               (1 << 5)
573 #define ARM_SMMU_FEAT_SEV               (1 << 6)
574 #define ARM_SMMU_FEAT_MSI               (1 << 7)
575 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
576 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
577 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
578 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
579 #define ARM_SMMU_FEAT_HYP               (1 << 12)
580         u32                             features;
581
582 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
583         u32                             options;
584
585         struct arm_smmu_cmdq            cmdq;
586         struct arm_smmu_evtq            evtq;
587         struct arm_smmu_priq            priq;
588
589         int                             gerr_irq;
590
591         unsigned long                   ias; /* IPA */
592         unsigned long                   oas; /* PA */
593
594 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
595         unsigned int                    asid_bits;
596         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
597
598 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
599         unsigned int                    vmid_bits;
600         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
601
602         unsigned int                    ssid_bits;
603         unsigned int                    sid_bits;
604
605         struct arm_smmu_strtab_cfg      strtab_cfg;
606 };
607
608 /* SMMU private data for an IOMMU group */
609 struct arm_smmu_group {
610         struct arm_smmu_device          *smmu;
611         struct arm_smmu_domain          *domain;
612         int                             num_sids;
613         u32                             *sids;
614         struct arm_smmu_strtab_ent      ste;
615 };
616
617 /* SMMU private data for an IOMMU domain */
618 enum arm_smmu_domain_stage {
619         ARM_SMMU_DOMAIN_S1 = 0,
620         ARM_SMMU_DOMAIN_S2,
621         ARM_SMMU_DOMAIN_NESTED,
622 };
623
624 struct arm_smmu_domain {
625         struct arm_smmu_device          *smmu;
626         struct mutex                    init_mutex; /* Protects smmu pointer */
627
628         struct io_pgtable_ops           *pgtbl_ops;
629         spinlock_t                      pgtbl_lock;
630
631         enum arm_smmu_domain_stage      stage;
632         union {
633                 struct arm_smmu_s1_cfg  s1_cfg;
634                 struct arm_smmu_s2_cfg  s2_cfg;
635         };
636
637         struct iommu_domain             domain;
638 };
639
640 struct arm_smmu_option_prop {
641         u32 opt;
642         const char *prop;
643 };
644
645 static struct arm_smmu_option_prop arm_smmu_options[] = {
646         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
647         { 0, NULL},
648 };
649
650 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
651 {
652         return container_of(dom, struct arm_smmu_domain, domain);
653 }
654
655 static void parse_driver_options(struct arm_smmu_device *smmu)
656 {
657         int i = 0;
658
659         do {
660                 if (of_property_read_bool(smmu->dev->of_node,
661                                                 arm_smmu_options[i].prop)) {
662                         smmu->options |= arm_smmu_options[i].opt;
663                         dev_notice(smmu->dev, "option %s\n",
664                                 arm_smmu_options[i].prop);
665                 }
666         } while (arm_smmu_options[++i].opt);
667 }
668
669 /* Low-level queue manipulation functions */
670 static bool queue_full(struct arm_smmu_queue *q)
671 {
672         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
673                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
674 }
675
676 static bool queue_empty(struct arm_smmu_queue *q)
677 {
678         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
679                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
680 }
681
682 static void queue_sync_cons(struct arm_smmu_queue *q)
683 {
684         q->cons = readl_relaxed(q->cons_reg);
685 }
686
687 static void queue_inc_cons(struct arm_smmu_queue *q)
688 {
689         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
690
691         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
692         writel(q->cons, q->cons_reg);
693 }
694
695 static int queue_sync_prod(struct arm_smmu_queue *q)
696 {
697         int ret = 0;
698         u32 prod = readl_relaxed(q->prod_reg);
699
700         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
701                 ret = -EOVERFLOW;
702
703         q->prod = prod;
704         return ret;
705 }
706
707 static void queue_inc_prod(struct arm_smmu_queue *q)
708 {
709         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
710
711         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
712         writel(q->prod, q->prod_reg);
713 }
714
715 static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
716 {
717         if (Q_WRP(q, q->cons) == Q_WRP(q, until))
718                 return Q_IDX(q, q->cons) < Q_IDX(q, until);
719
720         return Q_IDX(q, q->cons) >= Q_IDX(q, until);
721 }
722
723 static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
724 {
725         ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
726
727         while (queue_sync_cons(q), __queue_cons_before(q, until)) {
728                 if (ktime_compare(ktime_get(), timeout) > 0)
729                         return -ETIMEDOUT;
730
731                 if (wfe) {
732                         wfe();
733                 } else {
734                         cpu_relax();
735                         udelay(1);
736                 }
737         }
738
739         return 0;
740 }
741
742 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
743 {
744         int i;
745
746         for (i = 0; i < n_dwords; ++i)
747                 *dst++ = cpu_to_le64(*src++);
748 }
749
750 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
751 {
752         if (queue_full(q))
753                 return -ENOSPC;
754
755         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
756         queue_inc_prod(q);
757         return 0;
758 }
759
760 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
761 {
762         int i;
763
764         for (i = 0; i < n_dwords; ++i)
765                 *dst++ = le64_to_cpu(*src++);
766 }
767
768 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
769 {
770         if (queue_empty(q))
771                 return -EAGAIN;
772
773         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
774         queue_inc_cons(q);
775         return 0;
776 }
777
778 /* High-level queue accessors */
779 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
780 {
781         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
782         cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
783
784         switch (ent->opcode) {
785         case CMDQ_OP_TLBI_EL2_ALL:
786         case CMDQ_OP_TLBI_NSNH_ALL:
787                 break;
788         case CMDQ_OP_PREFETCH_CFG:
789                 cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
790                 cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
791                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
792                 break;
793         case CMDQ_OP_CFGI_STE:
794                 cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
795                 cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
796                 break;
797         case CMDQ_OP_CFGI_ALL:
798                 /* Cover the entire SID range */
799                 cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
800                 break;
801         case CMDQ_OP_TLBI_NH_VA:
802                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
803                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
804                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
805                 break;
806         case CMDQ_OP_TLBI_S2_IPA:
807                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
808                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
809                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
810                 break;
811         case CMDQ_OP_TLBI_NH_ASID:
812                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
813                 /* Fallthrough */
814         case CMDQ_OP_TLBI_S12_VMALL:
815                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
816                 break;
817         case CMDQ_OP_PRI_RESP:
818                 cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
819                 cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
820                 cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
821                 cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
822                 switch (ent->pri.resp) {
823                 case PRI_RESP_DENY:
824                         cmd[1] |= CMDQ_PRI_1_RESP_DENY;
825                         break;
826                 case PRI_RESP_FAIL:
827                         cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
828                         break;
829                 case PRI_RESP_SUCC:
830                         cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
831                         break;
832                 default:
833                         return -EINVAL;
834                 }
835                 break;
836         case CMDQ_OP_CMD_SYNC:
837                 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
838                 break;
839         default:
840                 return -ENOENT;
841         }
842
843         return 0;
844 }
845
846 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
847 {
848         static const char *cerror_str[] = {
849                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
850                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
851                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
852         };
853
854         int i;
855         u64 cmd[CMDQ_ENT_DWORDS];
856         struct arm_smmu_queue *q = &smmu->cmdq.q;
857         u32 cons = readl_relaxed(q->cons_reg);
858         u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
859         struct arm_smmu_cmdq_ent cmd_sync = {
860                 .opcode = CMDQ_OP_CMD_SYNC,
861         };
862
863         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
864                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
865
866         switch (idx) {
867         case CMDQ_ERR_CERROR_ABT_IDX:
868                 dev_err(smmu->dev, "retrying command fetch\n");
869         case CMDQ_ERR_CERROR_NONE_IDX:
870                 return;
871         case CMDQ_ERR_CERROR_ILL_IDX:
872                 /* Fallthrough */
873         default:
874                 break;
875         }
876
877         /*
878          * We may have concurrent producers, so we need to be careful
879          * not to touch any of the shadow cmdq state.
880          */
881         queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
882         dev_err(smmu->dev, "skipping command in error state:\n");
883         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
884                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
885
886         /* Convert the erroneous command into a CMD_SYNC */
887         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
888                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
889                 return;
890         }
891
892         queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
893 }
894
895 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
896                                     struct arm_smmu_cmdq_ent *ent)
897 {
898         u32 until;
899         u64 cmd[CMDQ_ENT_DWORDS];
900         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
901         struct arm_smmu_queue *q = &smmu->cmdq.q;
902
903         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
904                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
905                          ent->opcode);
906                 return;
907         }
908
909         spin_lock(&smmu->cmdq.lock);
910         while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
911                 /*
912                  * Keep the queue locked, otherwise the producer could wrap
913                  * twice and we could see a future consumer pointer that looks
914                  * like it's behind us.
915                  */
916                 if (queue_poll_cons(q, until, wfe))
917                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
918         }
919
920         if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
921                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
922         spin_unlock(&smmu->cmdq.lock);
923 }
924
925 /* Context descriptor manipulation functions */
926 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
927 {
928         u64 val = 0;
929
930         /* Repack the TCR. Just care about TTBR0 for now */
931         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
932         val |= ARM_SMMU_TCR2CD(tcr, TG0);
933         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
934         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
935         val |= ARM_SMMU_TCR2CD(tcr, SH0);
936         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
937         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
938         val |= ARM_SMMU_TCR2CD(tcr, IPS);
939         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
940
941         return val;
942 }
943
944 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
945                                     struct arm_smmu_s1_cfg *cfg)
946 {
947         u64 val;
948
949         /*
950          * We don't need to issue any invalidation here, as we'll invalidate
951          * the STE when installing the new entry anyway.
952          */
953         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
954 #ifdef __BIG_ENDIAN
955               CTXDESC_CD_0_ENDI |
956 #endif
957               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
958               CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
959               CTXDESC_CD_0_V;
960         cfg->cdptr[0] = cpu_to_le64(val);
961
962         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
963         cfg->cdptr[1] = cpu_to_le64(val);
964
965         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
966 }
967
968 /* Stream table manipulation functions */
969 static void
970 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
971 {
972         u64 val = 0;
973
974         val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
975                 << STRTAB_L1_DESC_SPAN_SHIFT;
976         val |= desc->l2ptr_dma &
977                STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
978
979         *dst = cpu_to_le64(val);
980 }
981
982 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
983 {
984         struct arm_smmu_cmdq_ent cmd = {
985                 .opcode = CMDQ_OP_CFGI_STE,
986                 .cfgi   = {
987                         .sid    = sid,
988                         .leaf   = true,
989                 },
990         };
991
992         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
993         cmd.opcode = CMDQ_OP_CMD_SYNC;
994         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
995 }
996
997 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
998                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
999 {
1000         /*
1001          * This is hideously complicated, but we only really care about
1002          * three cases at the moment:
1003          *
1004          * 1. Invalid (all zero) -> bypass  (init)
1005          * 2. Bypass -> translation (attach)
1006          * 3. Translation -> bypass (detach)
1007          *
1008          * Given that we can't update the STE atomically and the SMMU
1009          * doesn't read the thing in a defined order, that leaves us
1010          * with the following maintenance requirements:
1011          *
1012          * 1. Update Config, return (init time STEs aren't live)
1013          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1014          * 3. Update Config, sync
1015          */
1016         u64 val = le64_to_cpu(dst[0]);
1017         bool ste_live = false;
1018         struct arm_smmu_cmdq_ent prefetch_cmd = {
1019                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1020                 .prefetch       = {
1021                         .sid    = sid,
1022                 },
1023         };
1024
1025         if (val & STRTAB_STE_0_V) {
1026                 u64 cfg;
1027
1028                 cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1029                 switch (cfg) {
1030                 case STRTAB_STE_0_CFG_BYPASS:
1031                         break;
1032                 case STRTAB_STE_0_CFG_S1_TRANS:
1033                 case STRTAB_STE_0_CFG_S2_TRANS:
1034                         ste_live = true;
1035                         break;
1036                 default:
1037                         BUG(); /* STE corruption */
1038                 }
1039         }
1040
1041         /* Nuke the existing Config, as we're going to rewrite it */
1042         val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1043
1044         if (ste->valid)
1045                 val |= STRTAB_STE_0_V;
1046         else
1047                 val &= ~STRTAB_STE_0_V;
1048
1049         if (ste->bypass) {
1050                 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1051                                       : STRTAB_STE_0_CFG_BYPASS;
1052                 dst[0] = cpu_to_le64(val);
1053                 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
1054                          << STRTAB_STE_1_SHCFG_SHIFT);
1055                 dst[2] = 0; /* Nuke the VMID */
1056                 if (ste_live)
1057                         arm_smmu_sync_ste_for_sid(smmu, sid);
1058                 return;
1059         }
1060
1061         if (ste->s1_cfg) {
1062                 BUG_ON(ste_live);
1063                 dst[1] = cpu_to_le64(
1064                          STRTAB_STE_1_S1C_CACHE_WBRA
1065                          << STRTAB_STE_1_S1CIR_SHIFT |
1066                          STRTAB_STE_1_S1C_CACHE_WBRA
1067                          << STRTAB_STE_1_S1COR_SHIFT |
1068                          STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1069 #ifdef CONFIG_PCI_ATS
1070                          STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1071 #endif
1072                          STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1073
1074                 if (smmu->features & ARM_SMMU_FEAT_STALLS)
1075                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1076
1077                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1078                         << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1079                         STRTAB_STE_0_CFG_S1_TRANS;
1080
1081         }
1082
1083         if (ste->s2_cfg) {
1084                 BUG_ON(ste_live);
1085                 dst[2] = cpu_to_le64(
1086                          ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1087                          (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1088                           << STRTAB_STE_2_VTCR_SHIFT |
1089 #ifdef __BIG_ENDIAN
1090                          STRTAB_STE_2_S2ENDI |
1091 #endif
1092                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1093                          STRTAB_STE_2_S2R);
1094
1095                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1096                          STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1097
1098                 val |= STRTAB_STE_0_CFG_S2_TRANS;
1099         }
1100
1101         arm_smmu_sync_ste_for_sid(smmu, sid);
1102         dst[0] = cpu_to_le64(val);
1103         arm_smmu_sync_ste_for_sid(smmu, sid);
1104
1105         /* It's likely that we'll want to use the new STE soon */
1106         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1107                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1108 }
1109
1110 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1111 {
1112         unsigned int i;
1113         struct arm_smmu_strtab_ent ste = {
1114                 .valid  = true,
1115                 .bypass = true,
1116         };
1117
1118         for (i = 0; i < nent; ++i) {
1119                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1120                 strtab += STRTAB_STE_DWORDS;
1121         }
1122 }
1123
1124 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1125 {
1126         size_t size;
1127         void *strtab;
1128         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1129         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1130
1131         if (desc->l2ptr)
1132                 return 0;
1133
1134         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1135         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1136
1137         desc->span = STRTAB_SPLIT + 1;
1138         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1139                                           GFP_KERNEL | __GFP_ZERO);
1140         if (!desc->l2ptr) {
1141                 dev_err(smmu->dev,
1142                         "failed to allocate l2 stream table for SID %u\n",
1143                         sid);
1144                 return -ENOMEM;
1145         }
1146
1147         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1148         arm_smmu_write_strtab_l1_desc(strtab, desc);
1149         return 0;
1150 }
1151
1152 /* IRQ and event handlers */
1153 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1154 {
1155         int i;
1156         struct arm_smmu_device *smmu = dev;
1157         struct arm_smmu_queue *q = &smmu->evtq.q;
1158         u64 evt[EVTQ_ENT_DWORDS];
1159
1160         while (!queue_remove_raw(q, evt)) {
1161                 u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1162
1163                 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1164                 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1165                         dev_info(smmu->dev, "\t0x%016llx\n",
1166                                  (unsigned long long)evt[i]);
1167         }
1168
1169         /* Sync our overflow flag, as we believe we're up to speed */
1170         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1171         return IRQ_HANDLED;
1172 }
1173
1174 static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1175 {
1176         irqreturn_t ret = IRQ_WAKE_THREAD;
1177         struct arm_smmu_device *smmu = dev;
1178         struct arm_smmu_queue *q = &smmu->evtq.q;
1179
1180         /*
1181          * Not much we can do on overflow, so scream and pretend we're
1182          * trying harder.
1183          */
1184         if (queue_sync_prod(q) == -EOVERFLOW)
1185                 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1186         else if (queue_empty(q))
1187                 ret = IRQ_NONE;
1188
1189         return ret;
1190 }
1191
1192 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1193 {
1194         struct arm_smmu_device *smmu = dev;
1195         struct arm_smmu_queue *q = &smmu->priq.q;
1196         u64 evt[PRIQ_ENT_DWORDS];
1197
1198         while (!queue_remove_raw(q, evt)) {
1199                 u32 sid, ssid;
1200                 u16 grpid;
1201                 bool ssv, last;
1202
1203                 sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1204                 ssv = evt[0] & PRIQ_0_SSID_V;
1205                 ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1206                 last = evt[0] & PRIQ_0_PRG_LAST;
1207                 grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1208
1209                 dev_info(smmu->dev, "unexpected PRI request received:\n");
1210                 dev_info(smmu->dev,
1211                          "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1212                          sid, ssid, grpid, last ? "L" : "",
1213                          evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1214                          evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1215                          evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1216                          evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1217                          evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1218
1219                 if (last) {
1220                         struct arm_smmu_cmdq_ent cmd = {
1221                                 .opcode                 = CMDQ_OP_PRI_RESP,
1222                                 .substream_valid        = ssv,
1223                                 .pri                    = {
1224                                         .sid    = sid,
1225                                         .ssid   = ssid,
1226                                         .grpid  = grpid,
1227                                         .resp   = PRI_RESP_DENY,
1228                                 },
1229                         };
1230
1231                         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1232                 }
1233         }
1234
1235         /* Sync our overflow flag, as we believe we're up to speed */
1236         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1237         return IRQ_HANDLED;
1238 }
1239
1240 static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1241 {
1242         irqreturn_t ret = IRQ_WAKE_THREAD;
1243         struct arm_smmu_device *smmu = dev;
1244         struct arm_smmu_queue *q = &smmu->priq.q;
1245
1246         /* PRIQ overflow indicates a programming error */
1247         if (queue_sync_prod(q) == -EOVERFLOW)
1248                 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1249         else if (queue_empty(q))
1250                 ret = IRQ_NONE;
1251
1252         return ret;
1253 }
1254
1255 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1256 {
1257         /* We don't actually use CMD_SYNC interrupts for anything */
1258         return IRQ_HANDLED;
1259 }
1260
1261 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1262
1263 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1264 {
1265         u32 gerror, gerrorn, active;
1266         struct arm_smmu_device *smmu = dev;
1267
1268         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1269         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1270
1271         active = gerror ^ gerrorn;
1272         if (!(active & GERROR_ERR_MASK))
1273                 return IRQ_NONE; /* No errors pending */
1274
1275         dev_warn(smmu->dev,
1276                  "unexpected global error reported (0x%08x), this could be serious\n",
1277                  active);
1278
1279         if (active & GERROR_SFM_ERR) {
1280                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1281                 arm_smmu_device_disable(smmu);
1282         }
1283
1284         if (active & GERROR_MSI_GERROR_ABT_ERR)
1285                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1286
1287         if (active & GERROR_MSI_PRIQ_ABT_ERR) {
1288                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1289                 arm_smmu_priq_handler(irq, smmu->dev);
1290         }
1291
1292         if (active & GERROR_MSI_EVTQ_ABT_ERR) {
1293                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1294                 arm_smmu_evtq_handler(irq, smmu->dev);
1295         }
1296
1297         if (active & GERROR_MSI_CMDQ_ABT_ERR) {
1298                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1299                 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1300         }
1301
1302         if (active & GERROR_PRIQ_ABT_ERR)
1303                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1304
1305         if (active & GERROR_EVTQ_ABT_ERR)
1306                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1307
1308         if (active & GERROR_CMDQ_ERR)
1309                 arm_smmu_cmdq_skip_err(smmu);
1310
1311         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1312         return IRQ_HANDLED;
1313 }
1314
1315 /* IO_PGTABLE API */
1316 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1317 {
1318         struct arm_smmu_cmdq_ent cmd;
1319
1320         cmd.opcode = CMDQ_OP_CMD_SYNC;
1321         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1322 }
1323
1324 static void arm_smmu_tlb_sync(void *cookie)
1325 {
1326         struct arm_smmu_domain *smmu_domain = cookie;
1327         __arm_smmu_tlb_sync(smmu_domain->smmu);
1328 }
1329
1330 static void arm_smmu_tlb_inv_context(void *cookie)
1331 {
1332         struct arm_smmu_domain *smmu_domain = cookie;
1333         struct arm_smmu_device *smmu = smmu_domain->smmu;
1334         struct arm_smmu_cmdq_ent cmd;
1335
1336         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1337                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1338                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1339                 cmd.tlbi.vmid   = 0;
1340         } else {
1341                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1342                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1343         }
1344
1345         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1346         __arm_smmu_tlb_sync(smmu);
1347 }
1348
1349 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1350                                           size_t granule, bool leaf, void *cookie)
1351 {
1352         struct arm_smmu_domain *smmu_domain = cookie;
1353         struct arm_smmu_device *smmu = smmu_domain->smmu;
1354         struct arm_smmu_cmdq_ent cmd = {
1355                 .tlbi = {
1356                         .leaf   = leaf,
1357                         .addr   = iova,
1358                 },
1359         };
1360
1361         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1362                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1363                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1364         } else {
1365                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1366                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1367         }
1368
1369         do {
1370                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1371                 cmd.tlbi.addr += granule;
1372         } while (size -= granule);
1373 }
1374
1375 static struct iommu_gather_ops arm_smmu_gather_ops = {
1376         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1377         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1378         .tlb_sync       = arm_smmu_tlb_sync,
1379 };
1380
1381 /* IOMMU API */
1382 static bool arm_smmu_capable(enum iommu_cap cap)
1383 {
1384         switch (cap) {
1385         case IOMMU_CAP_CACHE_COHERENCY:
1386                 return true;
1387         case IOMMU_CAP_INTR_REMAP:
1388                 return true; /* MSIs are just memory writes */
1389         case IOMMU_CAP_NOEXEC:
1390                 return true;
1391         default:
1392                 return false;
1393         }
1394 }
1395
1396 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1397 {
1398         struct arm_smmu_domain *smmu_domain;
1399
1400         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1401                 return NULL;
1402
1403         /*
1404          * Allocate the domain and initialise some of its data structures.
1405          * We can't really do anything meaningful until we've added a
1406          * master.
1407          */
1408         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1409         if (!smmu_domain)
1410                 return NULL;
1411
1412         if (type == IOMMU_DOMAIN_DMA &&
1413             iommu_get_dma_cookie(&smmu_domain->domain)) {
1414                 kfree(smmu_domain);
1415                 return NULL;
1416         }
1417
1418         mutex_init(&smmu_domain->init_mutex);
1419         spin_lock_init(&smmu_domain->pgtbl_lock);
1420         return &smmu_domain->domain;
1421 }
1422
1423 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1424 {
1425         int idx, size = 1 << span;
1426
1427         do {
1428                 idx = find_first_zero_bit(map, size);
1429                 if (idx == size)
1430                         return -ENOSPC;
1431         } while (test_and_set_bit(idx, map));
1432
1433         return idx;
1434 }
1435
1436 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1437 {
1438         clear_bit(idx, map);
1439 }
1440
1441 static void arm_smmu_domain_free(struct iommu_domain *domain)
1442 {
1443         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1444         struct arm_smmu_device *smmu = smmu_domain->smmu;
1445
1446         iommu_put_dma_cookie(domain);
1447         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1448
1449         /* Free the CD and ASID, if we allocated them */
1450         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1451                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1452
1453                 if (cfg->cdptr) {
1454                         dmam_free_coherent(smmu_domain->smmu->dev,
1455                                            CTXDESC_CD_DWORDS << 3,
1456                                            cfg->cdptr,
1457                                            cfg->cdptr_dma);
1458
1459                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1460                 }
1461         } else {
1462                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1463                 if (cfg->vmid)
1464                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1465         }
1466
1467         kfree(smmu_domain);
1468 }
1469
1470 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1471                                        struct io_pgtable_cfg *pgtbl_cfg)
1472 {
1473         int ret;
1474         int asid;
1475         struct arm_smmu_device *smmu = smmu_domain->smmu;
1476         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1477
1478         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1479         if (IS_ERR_VALUE(asid))
1480                 return asid;
1481
1482         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1483                                          &cfg->cdptr_dma,
1484                                          GFP_KERNEL | __GFP_ZERO);
1485         if (!cfg->cdptr) {
1486                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1487                 ret = -ENOMEM;
1488                 goto out_free_asid;
1489         }
1490
1491         cfg->cd.asid    = (u16)asid;
1492         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1493         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1494         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1495         return 0;
1496
1497 out_free_asid:
1498         arm_smmu_bitmap_free(smmu->asid_map, asid);
1499         return ret;
1500 }
1501
1502 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1503                                        struct io_pgtable_cfg *pgtbl_cfg)
1504 {
1505         int vmid;
1506         struct arm_smmu_device *smmu = smmu_domain->smmu;
1507         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1508
1509         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1510         if (IS_ERR_VALUE(vmid))
1511                 return vmid;
1512
1513         cfg->vmid       = (u16)vmid;
1514         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1515         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1516         return 0;
1517 }
1518
1519 static struct iommu_ops arm_smmu_ops;
1520
1521 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1522 {
1523         int ret;
1524         unsigned long ias, oas;
1525         enum io_pgtable_fmt fmt;
1526         struct io_pgtable_cfg pgtbl_cfg;
1527         struct io_pgtable_ops *pgtbl_ops;
1528         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1529                                  struct io_pgtable_cfg *);
1530         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1531         struct arm_smmu_device *smmu = smmu_domain->smmu;
1532
1533         /* Restrict the stage to what we can actually support */
1534         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1535                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1536         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1537                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1538
1539         switch (smmu_domain->stage) {
1540         case ARM_SMMU_DOMAIN_S1:
1541                 ias = VA_BITS;
1542                 oas = smmu->ias;
1543                 fmt = ARM_64_LPAE_S1;
1544                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1545                 break;
1546         case ARM_SMMU_DOMAIN_NESTED:
1547         case ARM_SMMU_DOMAIN_S2:
1548                 ias = smmu->ias;
1549                 oas = smmu->oas;
1550                 fmt = ARM_64_LPAE_S2;
1551                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1552                 break;
1553         default:
1554                 return -EINVAL;
1555         }
1556
1557         pgtbl_cfg = (struct io_pgtable_cfg) {
1558                 .pgsize_bitmap  = arm_smmu_ops.pgsize_bitmap,
1559                 .ias            = ias,
1560                 .oas            = oas,
1561                 .tlb            = &arm_smmu_gather_ops,
1562                 .iommu_dev      = smmu->dev,
1563         };
1564
1565         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1566         if (!pgtbl_ops)
1567                 return -ENOMEM;
1568
1569         arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1570         smmu_domain->pgtbl_ops = pgtbl_ops;
1571
1572         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1573         if (IS_ERR_VALUE(ret))
1574                 free_io_pgtable_ops(pgtbl_ops);
1575
1576         return ret;
1577 }
1578
1579 static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1580 {
1581         struct iommu_group *group;
1582         struct arm_smmu_group *smmu_group;
1583
1584         group = iommu_group_get(dev);
1585         if (!group)
1586                 return NULL;
1587
1588         smmu_group = iommu_group_get_iommudata(group);
1589         iommu_group_put(group);
1590         return smmu_group;
1591 }
1592
1593 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1594 {
1595         __le64 *step;
1596         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1597
1598         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1599                 struct arm_smmu_strtab_l1_desc *l1_desc;
1600                 int idx;
1601
1602                 /* Two-level walk */
1603                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1604                 l1_desc = &cfg->l1_desc[idx];
1605                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1606                 step = &l1_desc->l2ptr[idx];
1607         } else {
1608                 /* Simple linear lookup */
1609                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1610         }
1611
1612         return step;
1613 }
1614
1615 static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1616 {
1617         int i;
1618         struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1619         struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1620         struct arm_smmu_device *smmu = smmu_group->smmu;
1621
1622         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1623                 ste->s1_cfg = &smmu_domain->s1_cfg;
1624                 ste->s2_cfg = NULL;
1625                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1626         } else {
1627                 ste->s1_cfg = NULL;
1628                 ste->s2_cfg = &smmu_domain->s2_cfg;
1629         }
1630
1631         for (i = 0; i < smmu_group->num_sids; ++i) {
1632                 u32 sid = smmu_group->sids[i];
1633                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1634
1635                 arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1636         }
1637
1638         return 0;
1639 }
1640
1641 static void arm_smmu_detach_dev(struct device *dev)
1642 {
1643         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1644
1645         smmu_group->ste.bypass = true;
1646         if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
1647                 dev_warn(dev, "failed to install bypass STE\n");
1648
1649         smmu_group->domain = NULL;
1650 }
1651
1652 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1653 {
1654         int ret = 0;
1655         struct arm_smmu_device *smmu;
1656         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1657         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1658
1659         if (!smmu_group)
1660                 return -ENOENT;
1661
1662         /* Already attached to a different domain? */
1663         if (smmu_group->domain && smmu_group->domain != smmu_domain)
1664                 arm_smmu_detach_dev(dev);
1665
1666         smmu = smmu_group->smmu;
1667         mutex_lock(&smmu_domain->init_mutex);
1668
1669         if (!smmu_domain->smmu) {
1670                 smmu_domain->smmu = smmu;
1671                 ret = arm_smmu_domain_finalise(domain);
1672                 if (ret) {
1673                         smmu_domain->smmu = NULL;
1674                         goto out_unlock;
1675                 }
1676         } else if (smmu_domain->smmu != smmu) {
1677                 dev_err(dev,
1678                         "cannot attach to SMMU %s (upstream of %s)\n",
1679                         dev_name(smmu_domain->smmu->dev),
1680                         dev_name(smmu->dev));
1681                 ret = -ENXIO;
1682                 goto out_unlock;
1683         }
1684
1685         /* Group already attached to this domain? */
1686         if (smmu_group->domain)
1687                 goto out_unlock;
1688
1689         smmu_group->domain      = smmu_domain;
1690
1691         /*
1692          * FIXME: This should always be "false" once we have IOMMU-backed
1693          * DMA ops for all devices behind the SMMU.
1694          */
1695         smmu_group->ste.bypass  = domain->type == IOMMU_DOMAIN_DMA;
1696
1697         ret = arm_smmu_install_ste_for_group(smmu_group);
1698         if (IS_ERR_VALUE(ret))
1699                 smmu_group->domain = NULL;
1700
1701 out_unlock:
1702         mutex_unlock(&smmu_domain->init_mutex);
1703         return ret;
1704 }
1705
1706 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1707                         phys_addr_t paddr, size_t size, int prot)
1708 {
1709         int ret;
1710         unsigned long flags;
1711         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1712         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1713
1714         if (!ops)
1715                 return -ENODEV;
1716
1717         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1718         ret = ops->map(ops, iova, paddr, size, prot);
1719         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1720         return ret;
1721 }
1722
1723 static size_t
1724 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1725 {
1726         size_t ret;
1727         unsigned long flags;
1728         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1729         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1730
1731         if (!ops)
1732                 return 0;
1733
1734         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1735         ret = ops->unmap(ops, iova, size);
1736         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1737         return ret;
1738 }
1739
1740 static phys_addr_t
1741 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1742 {
1743         phys_addr_t ret;
1744         unsigned long flags;
1745         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1746         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1747
1748         if (!ops)
1749                 return 0;
1750
1751         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1752         ret = ops->iova_to_phys(ops, iova);
1753         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1754
1755         return ret;
1756 }
1757
1758 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1759 {
1760         *(u32 *)sidp = alias;
1761         return 0; /* Continue walking */
1762 }
1763
1764 static void __arm_smmu_release_pci_iommudata(void *data)
1765 {
1766         kfree(data);
1767 }
1768
1769 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1770 {
1771         struct device_node *of_node;
1772         struct platform_device *smmu_pdev;
1773         struct arm_smmu_device *smmu = NULL;
1774         struct pci_bus *bus = pdev->bus;
1775
1776         /* Walk up to the root bus */
1777         while (!pci_is_root_bus(bus))
1778                 bus = bus->parent;
1779
1780         /* Follow the "iommus" phandle from the host controller */
1781         of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1782         if (!of_node)
1783                 return NULL;
1784
1785         /* See if we can find an SMMU corresponding to the phandle */
1786         smmu_pdev = of_find_device_by_node(of_node);
1787         if (smmu_pdev)
1788                 smmu = platform_get_drvdata(smmu_pdev);
1789
1790         of_node_put(of_node);
1791         return smmu;
1792 }
1793
1794 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1795 {
1796         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1797
1798         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1799                 limit *= 1UL << STRTAB_SPLIT;
1800
1801         return sid < limit;
1802 }
1803
1804 static int arm_smmu_add_device(struct device *dev)
1805 {
1806         int i, ret;
1807         u32 sid, *sids;
1808         struct pci_dev *pdev;
1809         struct iommu_group *group;
1810         struct arm_smmu_group *smmu_group;
1811         struct arm_smmu_device *smmu;
1812
1813         /* We only support PCI, for now */
1814         if (!dev_is_pci(dev))
1815                 return -ENODEV;
1816
1817         pdev = to_pci_dev(dev);
1818         group = iommu_group_get_for_dev(dev);
1819         if (IS_ERR(group))
1820                 return PTR_ERR(group);
1821
1822         smmu_group = iommu_group_get_iommudata(group);
1823         if (!smmu_group) {
1824                 smmu = arm_smmu_get_for_pci_dev(pdev);
1825                 if (!smmu) {
1826                         ret = -ENOENT;
1827                         goto out_remove_dev;
1828                 }
1829
1830                 smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1831                 if (!smmu_group) {
1832                         ret = -ENOMEM;
1833                         goto out_remove_dev;
1834                 }
1835
1836                 smmu_group->ste.valid   = true;
1837                 smmu_group->smmu        = smmu;
1838                 iommu_group_set_iommudata(group, smmu_group,
1839                                           __arm_smmu_release_pci_iommudata);
1840         } else {
1841                 smmu = smmu_group->smmu;
1842         }
1843
1844         /* Assume SID == RID until firmware tells us otherwise */
1845         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1846         for (i = 0; i < smmu_group->num_sids; ++i) {
1847                 /* If we already know about this SID, then we're done */
1848                 if (smmu_group->sids[i] == sid)
1849                         goto out_put_group;
1850         }
1851
1852         /* Check the SID is in range of the SMMU and our stream table */
1853         if (!arm_smmu_sid_in_range(smmu, sid)) {
1854                 ret = -ERANGE;
1855                 goto out_remove_dev;
1856         }
1857
1858         /* Ensure l2 strtab is initialised */
1859         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1860                 ret = arm_smmu_init_l2_strtab(smmu, sid);
1861                 if (ret)
1862                         goto out_remove_dev;
1863         }
1864
1865         /* Resize the SID array for the group */
1866         smmu_group->num_sids++;
1867         sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1868                         GFP_KERNEL);
1869         if (!sids) {
1870                 smmu_group->num_sids--;
1871                 ret = -ENOMEM;
1872                 goto out_remove_dev;
1873         }
1874
1875         /* Add the new SID */
1876         sids[smmu_group->num_sids - 1] = sid;
1877         smmu_group->sids = sids;
1878
1879 out_put_group:
1880         iommu_group_put(group);
1881         return 0;
1882
1883 out_remove_dev:
1884         iommu_group_remove_device(dev);
1885         iommu_group_put(group);
1886         return ret;
1887 }
1888
1889 static void arm_smmu_remove_device(struct device *dev)
1890 {
1891         iommu_group_remove_device(dev);
1892 }
1893
1894 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1895                                     enum iommu_attr attr, void *data)
1896 {
1897         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1898
1899         switch (attr) {
1900         case DOMAIN_ATTR_NESTING:
1901                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1902                 return 0;
1903         default:
1904                 return -ENODEV;
1905         }
1906 }
1907
1908 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1909                                     enum iommu_attr attr, void *data)
1910 {
1911         int ret = 0;
1912         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1913
1914         mutex_lock(&smmu_domain->init_mutex);
1915
1916         switch (attr) {
1917         case DOMAIN_ATTR_NESTING:
1918                 if (smmu_domain->smmu) {
1919                         ret = -EPERM;
1920                         goto out_unlock;
1921                 }
1922
1923                 if (*(int *)data)
1924                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1925                 else
1926                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1927
1928                 break;
1929         default:
1930                 ret = -ENODEV;
1931         }
1932
1933 out_unlock:
1934         mutex_unlock(&smmu_domain->init_mutex);
1935         return ret;
1936 }
1937
1938 static struct iommu_ops arm_smmu_ops = {
1939         .capable                = arm_smmu_capable,
1940         .domain_alloc           = arm_smmu_domain_alloc,
1941         .domain_free            = arm_smmu_domain_free,
1942         .attach_dev             = arm_smmu_attach_dev,
1943         .map                    = arm_smmu_map,
1944         .unmap                  = arm_smmu_unmap,
1945         .iova_to_phys           = arm_smmu_iova_to_phys,
1946         .add_device             = arm_smmu_add_device,
1947         .remove_device          = arm_smmu_remove_device,
1948         .device_group           = pci_device_group,
1949         .domain_get_attr        = arm_smmu_domain_get_attr,
1950         .domain_set_attr        = arm_smmu_domain_set_attr,
1951         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1952 };
1953
1954 /* Probing and initialisation functions */
1955 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1956                                    struct arm_smmu_queue *q,
1957                                    unsigned long prod_off,
1958                                    unsigned long cons_off,
1959                                    size_t dwords)
1960 {
1961         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1962
1963         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1964         if (!q->base) {
1965                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1966                         qsz);
1967                 return -ENOMEM;
1968         }
1969
1970         q->prod_reg     = smmu->base + prod_off;
1971         q->cons_reg     = smmu->base + cons_off;
1972         q->ent_dwords   = dwords;
1973
1974         q->q_base  = Q_BASE_RWA;
1975         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1976         q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1977                      << Q_BASE_LOG2SIZE_SHIFT;
1978
1979         q->prod = q->cons = 0;
1980         return 0;
1981 }
1982
1983 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1984 {
1985         int ret;
1986
1987         /* cmdq */
1988         spin_lock_init(&smmu->cmdq.lock);
1989         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1990                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1991         if (ret)
1992                 return ret;
1993
1994         /* evtq */
1995         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1996                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1997         if (ret)
1998                 return ret;
1999
2000         /* priq */
2001         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2002                 return 0;
2003
2004         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2005                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2006 }
2007
2008 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2009 {
2010         unsigned int i;
2011         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2012         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2013         void *strtab = smmu->strtab_cfg.strtab;
2014
2015         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2016         if (!cfg->l1_desc) {
2017                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2018                 return -ENOMEM;
2019         }
2020
2021         for (i = 0; i < cfg->num_l1_ents; ++i) {
2022                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2023                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2024         }
2025
2026         return 0;
2027 }
2028
2029 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2030 {
2031         void *strtab;
2032         u64 reg;
2033         u32 size, l1size;
2034         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2035
2036         /*
2037          * If we can resolve everything with a single L2 table, then we
2038          * just need a single L1 descriptor. Otherwise, calculate the L1
2039          * size, capped to the SIDSIZE.
2040          */
2041         if (smmu->sid_bits < STRTAB_SPLIT) {
2042                 size = 0;
2043         } else {
2044                 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2045                 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2046         }
2047         cfg->num_l1_ents = 1 << size;
2048
2049         size += STRTAB_SPLIT;
2050         if (size < smmu->sid_bits)
2051                 dev_warn(smmu->dev,
2052                          "2-level strtab only covers %u/%u bits of SID\n",
2053                          size, smmu->sid_bits);
2054
2055         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2056         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2057                                      GFP_KERNEL | __GFP_ZERO);
2058         if (!strtab) {
2059                 dev_err(smmu->dev,
2060                         "failed to allocate l1 stream table (%u bytes)\n",
2061                         size);
2062                 return -ENOMEM;
2063         }
2064         cfg->strtab = strtab;
2065
2066         /* Configure strtab_base_cfg for 2 levels */
2067         reg  = STRTAB_BASE_CFG_FMT_2LVL;
2068         reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2069                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2070         reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2071                 << STRTAB_BASE_CFG_SPLIT_SHIFT;
2072         cfg->strtab_base_cfg = reg;
2073
2074         return arm_smmu_init_l1_strtab(smmu);
2075 }
2076
2077 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2078 {
2079         void *strtab;
2080         u64 reg;
2081         u32 size;
2082         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2083
2084         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2085         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2086                                      GFP_KERNEL | __GFP_ZERO);
2087         if (!strtab) {
2088                 dev_err(smmu->dev,
2089                         "failed to allocate linear stream table (%u bytes)\n",
2090                         size);
2091                 return -ENOMEM;
2092         }
2093         cfg->strtab = strtab;
2094         cfg->num_l1_ents = 1 << smmu->sid_bits;
2095
2096         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2097         reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2098         reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2099                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2100         cfg->strtab_base_cfg = reg;
2101
2102         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2103         return 0;
2104 }
2105
2106 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2107 {
2108         u64 reg;
2109         int ret;
2110
2111         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2112                 ret = arm_smmu_init_strtab_2lvl(smmu);
2113         else
2114                 ret = arm_smmu_init_strtab_linear(smmu);
2115
2116         if (ret)
2117                 return ret;
2118
2119         /* Set the strtab base address */
2120         reg  = smmu->strtab_cfg.strtab_dma &
2121                STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2122         reg |= STRTAB_BASE_RA;
2123         smmu->strtab_cfg.strtab_base = reg;
2124
2125         /* Allocate the first VMID for stage-2 bypass STEs */
2126         set_bit(0, smmu->vmid_map);
2127         return 0;
2128 }
2129
2130 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2131 {
2132         int ret;
2133
2134         ret = arm_smmu_init_queues(smmu);
2135         if (ret)
2136                 return ret;
2137
2138         return arm_smmu_init_strtab(smmu);
2139 }
2140
2141 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2142                                    unsigned int reg_off, unsigned int ack_off)
2143 {
2144         u32 reg;
2145
2146         writel_relaxed(val, smmu->base + reg_off);
2147         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2148                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2149 }
2150
2151 static void arm_smmu_free_msis(void *data)
2152 {
2153         struct device *dev = data;
2154         platform_msi_domain_free_irqs(dev);
2155 }
2156
2157 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2158 {
2159         phys_addr_t doorbell;
2160         struct device *dev = msi_desc_to_dev(desc);
2161         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2162         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2163
2164         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2165         doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2166
2167         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2168         writel_relaxed(msg->data, smmu->base + cfg[1]);
2169         writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2170 }
2171
2172 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2173 {
2174         struct msi_desc *desc;
2175         int ret, nvec = ARM_SMMU_MAX_MSIS;
2176         struct device *dev = smmu->dev;
2177
2178         /* Clear the MSI address regs */
2179         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2180         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2181
2182         if (smmu->features & ARM_SMMU_FEAT_PRI)
2183                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2184         else
2185                 nvec--;
2186
2187         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2188                 return;
2189
2190         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2191         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2192         if (ret) {
2193                 dev_warn(dev, "failed to allocate MSIs\n");
2194                 return;
2195         }
2196
2197         for_each_msi_entry(desc, dev) {
2198                 switch (desc->platform.msi_index) {
2199                 case EVTQ_MSI_INDEX:
2200                         smmu->evtq.q.irq = desc->irq;
2201                         break;
2202                 case GERROR_MSI_INDEX:
2203                         smmu->gerr_irq = desc->irq;
2204                         break;
2205                 case PRIQ_MSI_INDEX:
2206                         smmu->priq.q.irq = desc->irq;
2207                         break;
2208                 default:        /* Unknown */
2209                         continue;
2210                 }
2211         }
2212
2213         /* Add callback to free MSIs on teardown */
2214         devm_add_action(dev, arm_smmu_free_msis, dev);
2215 }
2216
2217 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2218 {
2219         int ret, irq;
2220         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2221
2222         /* Disable IRQs first */
2223         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2224                                       ARM_SMMU_IRQ_CTRLACK);
2225         if (ret) {
2226                 dev_err(smmu->dev, "failed to disable irqs\n");
2227                 return ret;
2228         }
2229
2230         arm_smmu_setup_msis(smmu);
2231
2232         /* Request interrupt lines */
2233         irq = smmu->evtq.q.irq;
2234         if (irq) {
2235                 ret = devm_request_threaded_irq(smmu->dev, irq,
2236                                                 arm_smmu_evtq_handler,
2237                                                 arm_smmu_evtq_thread,
2238                                                 0, "arm-smmu-v3-evtq", smmu);
2239                 if (IS_ERR_VALUE(ret))
2240                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2241         }
2242
2243         irq = smmu->cmdq.q.irq;
2244         if (irq) {
2245                 ret = devm_request_irq(smmu->dev, irq,
2246                                        arm_smmu_cmdq_sync_handler, 0,
2247                                        "arm-smmu-v3-cmdq-sync", smmu);
2248                 if (IS_ERR_VALUE(ret))
2249                         dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2250         }
2251
2252         irq = smmu->gerr_irq;
2253         if (irq) {
2254                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2255                                        0, "arm-smmu-v3-gerror", smmu);
2256                 if (IS_ERR_VALUE(ret))
2257                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2258         }
2259
2260         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2261                 irq = smmu->priq.q.irq;
2262                 if (irq) {
2263                         ret = devm_request_threaded_irq(smmu->dev, irq,
2264                                                         arm_smmu_priq_handler,
2265                                                         arm_smmu_priq_thread,
2266                                                         0, "arm-smmu-v3-priq",
2267                                                         smmu);
2268                         if (IS_ERR_VALUE(ret))
2269                                 dev_warn(smmu->dev,
2270                                          "failed to enable priq irq\n");
2271                         else
2272                                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2273                 }
2274         }
2275
2276         /* Enable interrupt generation on the SMMU */
2277         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2278                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2279         if (ret)
2280                 dev_warn(smmu->dev, "failed to enable irqs\n");
2281
2282         return 0;
2283 }
2284
2285 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2286 {
2287         int ret;
2288
2289         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2290         if (ret)
2291                 dev_err(smmu->dev, "failed to clear cr0\n");
2292
2293         return ret;
2294 }
2295
2296 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2297 {
2298         int ret;
2299         u32 reg, enables;
2300         struct arm_smmu_cmdq_ent cmd;
2301
2302         /* Clear CR0 and sync (disables SMMU and queue processing) */
2303         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2304         if (reg & CR0_SMMUEN)
2305                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2306
2307         ret = arm_smmu_device_disable(smmu);
2308         if (ret)
2309                 return ret;
2310
2311         /* CR1 (table and queue memory attributes) */
2312         reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2313               (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2314               (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2315               (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2316               (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2317               (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2318         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2319
2320         /* CR2 (random crap) */
2321         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2322         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2323
2324         /* Stream table */
2325         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2326                        smmu->base + ARM_SMMU_STRTAB_BASE);
2327         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2328                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2329
2330         /* Command queue */
2331         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2332         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2333         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2334
2335         enables = CR0_CMDQEN;
2336         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2337                                       ARM_SMMU_CR0ACK);
2338         if (ret) {
2339                 dev_err(smmu->dev, "failed to enable command queue\n");
2340                 return ret;
2341         }
2342
2343         /* Invalidate any cached configuration */
2344         cmd.opcode = CMDQ_OP_CFGI_ALL;
2345         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2346         cmd.opcode = CMDQ_OP_CMD_SYNC;
2347         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2348
2349         /* Invalidate any stale TLB entries */
2350         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2351                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2352                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2353         }
2354
2355         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2356         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2357         cmd.opcode = CMDQ_OP_CMD_SYNC;
2358         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2359
2360         /* Event queue */
2361         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2362         writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2363         writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2364
2365         enables |= CR0_EVTQEN;
2366         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2367                                       ARM_SMMU_CR0ACK);
2368         if (ret) {
2369                 dev_err(smmu->dev, "failed to enable event queue\n");
2370                 return ret;
2371         }
2372
2373         /* PRI queue */
2374         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2375                 writeq_relaxed(smmu->priq.q.q_base,
2376                                smmu->base + ARM_SMMU_PRIQ_BASE);
2377                 writel_relaxed(smmu->priq.q.prod,
2378                                smmu->base + ARM_SMMU_PRIQ_PROD);
2379                 writel_relaxed(smmu->priq.q.cons,
2380                                smmu->base + ARM_SMMU_PRIQ_CONS);
2381
2382                 enables |= CR0_PRIQEN;
2383                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2384                                               ARM_SMMU_CR0ACK);
2385                 if (ret) {
2386                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2387                         return ret;
2388                 }
2389         }
2390
2391         ret = arm_smmu_setup_irqs(smmu);
2392         if (ret) {
2393                 dev_err(smmu->dev, "failed to setup irqs\n");
2394                 return ret;
2395         }
2396
2397         /* Enable the SMMU interface */
2398         enables |= CR0_SMMUEN;
2399         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2400                                       ARM_SMMU_CR0ACK);
2401         if (ret) {
2402                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2403                 return ret;
2404         }
2405
2406         return 0;
2407 }
2408
2409 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2410 {
2411         u32 reg;
2412         bool coherent;
2413         unsigned long pgsize_bitmap = 0;
2414
2415         /* IDR0 */
2416         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2417
2418         /* 2-level structures */
2419         if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2420                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2421
2422         if (reg & IDR0_CD2L)
2423                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2424
2425         /*
2426          * Translation table endianness.
2427          * We currently require the same endianness as the CPU, but this
2428          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2429          */
2430         switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2431         case IDR0_TTENDIAN_MIXED:
2432                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2433                 break;
2434 #ifdef __BIG_ENDIAN
2435         case IDR0_TTENDIAN_BE:
2436                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2437                 break;
2438 #else
2439         case IDR0_TTENDIAN_LE:
2440                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2441                 break;
2442 #endif
2443         default:
2444                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2445                 return -ENXIO;
2446         }
2447
2448         /* Boolean feature flags */
2449         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2450                 smmu->features |= ARM_SMMU_FEAT_PRI;
2451
2452         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2453                 smmu->features |= ARM_SMMU_FEAT_ATS;
2454
2455         if (reg & IDR0_SEV)
2456                 smmu->features |= ARM_SMMU_FEAT_SEV;
2457
2458         if (reg & IDR0_MSI)
2459                 smmu->features |= ARM_SMMU_FEAT_MSI;
2460
2461         if (reg & IDR0_HYP)
2462                 smmu->features |= ARM_SMMU_FEAT_HYP;
2463
2464         /*
2465          * The dma-coherent property is used in preference to the ID
2466          * register, but warn on mismatch.
2467          */
2468         coherent = of_dma_is_coherent(smmu->dev->of_node);
2469         if (coherent)
2470                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2471
2472         if (!!(reg & IDR0_COHACC) != coherent)
2473                 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2474                          coherent ? "true" : "false");
2475
2476         switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
2477         case IDR0_STALL_MODEL_STALL:
2478                 /* Fallthrough */
2479         case IDR0_STALL_MODEL_FORCE:
2480                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2481         }
2482
2483         if (reg & IDR0_S1P)
2484                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2485
2486         if (reg & IDR0_S2P)
2487                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2488
2489         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2490                 dev_err(smmu->dev, "no translation support!\n");
2491                 return -ENXIO;
2492         }
2493
2494         /* We only support the AArch64 table format at present */
2495         switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2496         case IDR0_TTF_AARCH32_64:
2497                 smmu->ias = 40;
2498                 /* Fallthrough */
2499         case IDR0_TTF_AARCH64:
2500                 break;
2501         default:
2502                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2503                 return -ENXIO;
2504         }
2505
2506         /* ASID/VMID sizes */
2507         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2508         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2509
2510         /* IDR1 */
2511         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2512         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2513                 dev_err(smmu->dev, "embedded implementation not supported\n");
2514                 return -ENXIO;
2515         }
2516
2517         /* Queue sizes, capped at 4k */
2518         smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2519                                        reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2520         if (!smmu->cmdq.q.max_n_shift) {
2521                 /* Odd alignment restrictions on the base, so ignore for now */
2522                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2523                 return -ENXIO;
2524         }
2525
2526         smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2527                                        reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2528         smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2529                                        reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2530
2531         /* SID/SSID sizes */
2532         smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2533         smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2534
2535         /* IDR5 */
2536         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2537
2538         /* Maximum number of outstanding stalls */
2539         smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2540                                 & IDR5_STALL_MAX_MASK;
2541
2542         /* Page sizes */
2543         if (reg & IDR5_GRAN64K)
2544                 pgsize_bitmap |= SZ_64K | SZ_512M;
2545         if (reg & IDR5_GRAN16K)
2546                 pgsize_bitmap |= SZ_16K | SZ_32M;
2547         if (reg & IDR5_GRAN4K)
2548                 pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2549
2550         arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
2551
2552         /* Output address size */
2553         switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2554         case IDR5_OAS_32_BIT:
2555                 smmu->oas = 32;
2556                 break;
2557         case IDR5_OAS_36_BIT:
2558                 smmu->oas = 36;
2559                 break;
2560         case IDR5_OAS_40_BIT:
2561                 smmu->oas = 40;
2562                 break;
2563         case IDR5_OAS_42_BIT:
2564                 smmu->oas = 42;
2565                 break;
2566         case IDR5_OAS_44_BIT:
2567                 smmu->oas = 44;
2568                 break;
2569         default:
2570                 dev_info(smmu->dev,
2571                         "unknown output address size. Truncating to 48-bit\n");
2572                 /* Fallthrough */
2573         case IDR5_OAS_48_BIT:
2574                 smmu->oas = 48;
2575         }
2576
2577         /* Set the DMA mask for our table walker */
2578         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2579                 dev_warn(smmu->dev,
2580                          "failed to set DMA mask for table walker\n");
2581
2582         smmu->ias = max(smmu->ias, smmu->oas);
2583
2584         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2585                  smmu->ias, smmu->oas, smmu->features);
2586         return 0;
2587 }
2588
2589 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2590 {
2591         int irq, ret;
2592         struct resource *res;
2593         struct arm_smmu_device *smmu;
2594         struct device *dev = &pdev->dev;
2595
2596         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2597         if (!smmu) {
2598                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2599                 return -ENOMEM;
2600         }
2601         smmu->dev = dev;
2602
2603         /* Base address */
2604         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2605         if (resource_size(res) + 1 < SZ_128K) {
2606                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2607                 return -EINVAL;
2608         }
2609
2610         smmu->base = devm_ioremap_resource(dev, res);
2611         if (IS_ERR(smmu->base))
2612                 return PTR_ERR(smmu->base);
2613
2614         /* Interrupt lines */
2615         irq = platform_get_irq_byname(pdev, "eventq");
2616         if (irq > 0)
2617                 smmu->evtq.q.irq = irq;
2618
2619         irq = platform_get_irq_byname(pdev, "priq");
2620         if (irq > 0)
2621                 smmu->priq.q.irq = irq;
2622
2623         irq = platform_get_irq_byname(pdev, "cmdq-sync");
2624         if (irq > 0)
2625                 smmu->cmdq.q.irq = irq;
2626
2627         irq = platform_get_irq_byname(pdev, "gerror");
2628         if (irq > 0)
2629                 smmu->gerr_irq = irq;
2630
2631         parse_driver_options(smmu);
2632
2633         /* Probe the h/w */
2634         ret = arm_smmu_device_probe(smmu);
2635         if (ret)
2636                 return ret;
2637
2638         /* Initialise in-memory data structures */
2639         ret = arm_smmu_init_structures(smmu);
2640         if (ret)
2641                 return ret;
2642
2643         /* Record our private device structure */
2644         platform_set_drvdata(pdev, smmu);
2645
2646         /* Reset the device */
2647         return arm_smmu_device_reset(smmu);
2648 }
2649
2650 static int arm_smmu_device_remove(struct platform_device *pdev)
2651 {
2652         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2653
2654         arm_smmu_device_disable(smmu);
2655         return 0;
2656 }
2657
2658 static struct of_device_id arm_smmu_of_match[] = {
2659         { .compatible = "arm,smmu-v3", },
2660         { },
2661 };
2662 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2663
2664 static struct platform_driver arm_smmu_driver = {
2665         .driver = {
2666                 .name           = "arm-smmu-v3",
2667                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2668         },
2669         .probe  = arm_smmu_device_dt_probe,
2670         .remove = arm_smmu_device_remove,
2671 };
2672
2673 static int __init arm_smmu_init(void)
2674 {
2675         struct device_node *np;
2676         int ret;
2677
2678         np = of_find_matching_node(NULL, arm_smmu_of_match);
2679         if (!np)
2680                 return 0;
2681
2682         of_node_put(np);
2683
2684         ret = platform_driver_register(&arm_smmu_driver);
2685         if (ret)
2686                 return ret;
2687
2688         return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2689 }
2690
2691 static void __exit arm_smmu_exit(void)
2692 {
2693         return platform_driver_unregister(&arm_smmu_driver);
2694 }
2695
2696 subsys_initcall(arm_smmu_init);
2697 module_exit(arm_smmu_exit);
2698
2699 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2700 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2701 MODULE_LICENSE("GPL v2");