]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/iommu/arm-smmu-v3.c
iommu/arm-smmu: Fix error checking for ASID and VMID allocation
[karo-tx-linux.git] / drivers / iommu / arm-smmu-v3.c
1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22
23 #include <linux/delay.h>
24 #include <linux/err.h>
25 #include <linux/interrupt.h>
26 #include <linux/iommu.h>
27 #include <linux/iopoll.h>
28 #include <linux/module.h>
29 #include <linux/of.h>
30 #include <linux/of_address.h>
31 #include <linux/of_platform.h>
32 #include <linux/pci.h>
33 #include <linux/platform_device.h>
34
35 #include "io-pgtable.h"
36
37 /* MMIO registers */
38 #define ARM_SMMU_IDR0                   0x0
39 #define IDR0_ST_LVL_SHIFT               27
40 #define IDR0_ST_LVL_MASK                0x3
41 #define IDR0_ST_LVL_2LVL                (1 << IDR0_ST_LVL_SHIFT)
42 #define IDR0_STALL_MODEL                (3 << 24)
43 #define IDR0_TTENDIAN_SHIFT             21
44 #define IDR0_TTENDIAN_MASK              0x3
45 #define IDR0_TTENDIAN_LE                (2 << IDR0_TTENDIAN_SHIFT)
46 #define IDR0_TTENDIAN_BE                (3 << IDR0_TTENDIAN_SHIFT)
47 #define IDR0_TTENDIAN_MIXED             (0 << IDR0_TTENDIAN_SHIFT)
48 #define IDR0_CD2L                       (1 << 19)
49 #define IDR0_VMID16                     (1 << 18)
50 #define IDR0_PRI                        (1 << 16)
51 #define IDR0_SEV                        (1 << 14)
52 #define IDR0_MSI                        (1 << 13)
53 #define IDR0_ASID16                     (1 << 12)
54 #define IDR0_ATS                        (1 << 10)
55 #define IDR0_HYP                        (1 << 9)
56 #define IDR0_COHACC                     (1 << 4)
57 #define IDR0_TTF_SHIFT                  2
58 #define IDR0_TTF_MASK                   0x3
59 #define IDR0_TTF_AARCH64                (2 << IDR0_TTF_SHIFT)
60 #define IDR0_S1P                        (1 << 1)
61 #define IDR0_S2P                        (1 << 0)
62
63 #define ARM_SMMU_IDR1                   0x4
64 #define IDR1_TABLES_PRESET              (1 << 30)
65 #define IDR1_QUEUES_PRESET              (1 << 29)
66 #define IDR1_REL                        (1 << 28)
67 #define IDR1_CMDQ_SHIFT                 21
68 #define IDR1_CMDQ_MASK                  0x1f
69 #define IDR1_EVTQ_SHIFT                 16
70 #define IDR1_EVTQ_MASK                  0x1f
71 #define IDR1_PRIQ_SHIFT                 11
72 #define IDR1_PRIQ_MASK                  0x1f
73 #define IDR1_SSID_SHIFT                 6
74 #define IDR1_SSID_MASK                  0x1f
75 #define IDR1_SID_SHIFT                  0
76 #define IDR1_SID_MASK                   0x3f
77
78 #define ARM_SMMU_IDR5                   0x14
79 #define IDR5_STALL_MAX_SHIFT            16
80 #define IDR5_STALL_MAX_MASK             0xffff
81 #define IDR5_GRAN64K                    (1 << 6)
82 #define IDR5_GRAN16K                    (1 << 5)
83 #define IDR5_GRAN4K                     (1 << 4)
84 #define IDR5_OAS_SHIFT                  0
85 #define IDR5_OAS_MASK                   0x7
86 #define IDR5_OAS_32_BIT                 (0 << IDR5_OAS_SHIFT)
87 #define IDR5_OAS_36_BIT                 (1 << IDR5_OAS_SHIFT)
88 #define IDR5_OAS_40_BIT                 (2 << IDR5_OAS_SHIFT)
89 #define IDR5_OAS_42_BIT                 (3 << IDR5_OAS_SHIFT)
90 #define IDR5_OAS_44_BIT                 (4 << IDR5_OAS_SHIFT)
91 #define IDR5_OAS_48_BIT                 (5 << IDR5_OAS_SHIFT)
92
93 #define ARM_SMMU_CR0                    0x20
94 #define CR0_CMDQEN                      (1 << 3)
95 #define CR0_EVTQEN                      (1 << 2)
96 #define CR0_PRIQEN                      (1 << 1)
97 #define CR0_SMMUEN                      (1 << 0)
98
99 #define ARM_SMMU_CR0ACK                 0x24
100
101 #define ARM_SMMU_CR1                    0x28
102 #define CR1_SH_NSH                      0
103 #define CR1_SH_OSH                      2
104 #define CR1_SH_ISH                      3
105 #define CR1_CACHE_NC                    0
106 #define CR1_CACHE_WB                    1
107 #define CR1_CACHE_WT                    2
108 #define CR1_TABLE_SH_SHIFT              10
109 #define CR1_TABLE_OC_SHIFT              8
110 #define CR1_TABLE_IC_SHIFT              6
111 #define CR1_QUEUE_SH_SHIFT              4
112 #define CR1_QUEUE_OC_SHIFT              2
113 #define CR1_QUEUE_IC_SHIFT              0
114
115 #define ARM_SMMU_CR2                    0x2c
116 #define CR2_PTM                         (1 << 2)
117 #define CR2_RECINVSID                   (1 << 1)
118 #define CR2_E2H                         (1 << 0)
119
120 #define ARM_SMMU_IRQ_CTRL               0x50
121 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
122 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
123 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
124
125 #define ARM_SMMU_IRQ_CTRLACK            0x54
126
127 #define ARM_SMMU_GERROR                 0x60
128 #define GERROR_SFM_ERR                  (1 << 8)
129 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
130 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
131 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
132 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
133 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
134 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
135 #define GERROR_CMDQ_ERR                 (1 << 0)
136 #define GERROR_ERR_MASK                 0xfd
137
138 #define ARM_SMMU_GERRORN                0x64
139
140 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
141 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
142 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
143
144 #define ARM_SMMU_STRTAB_BASE            0x80
145 #define STRTAB_BASE_RA                  (1UL << 62)
146 #define STRTAB_BASE_ADDR_SHIFT          6
147 #define STRTAB_BASE_ADDR_MASK           0x3ffffffffffUL
148
149 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
150 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT  0
151 #define STRTAB_BASE_CFG_LOG2SIZE_MASK   0x3f
152 #define STRTAB_BASE_CFG_SPLIT_SHIFT     6
153 #define STRTAB_BASE_CFG_SPLIT_MASK      0x1f
154 #define STRTAB_BASE_CFG_FMT_SHIFT       16
155 #define STRTAB_BASE_CFG_FMT_MASK        0x3
156 #define STRTAB_BASE_CFG_FMT_LINEAR      (0 << STRTAB_BASE_CFG_FMT_SHIFT)
157 #define STRTAB_BASE_CFG_FMT_2LVL        (1 << STRTAB_BASE_CFG_FMT_SHIFT)
158
159 #define ARM_SMMU_CMDQ_BASE              0x90
160 #define ARM_SMMU_CMDQ_PROD              0x98
161 #define ARM_SMMU_CMDQ_CONS              0x9c
162
163 #define ARM_SMMU_EVTQ_BASE              0xa0
164 #define ARM_SMMU_EVTQ_PROD              0x100a8
165 #define ARM_SMMU_EVTQ_CONS              0x100ac
166 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
167 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
168 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
169
170 #define ARM_SMMU_PRIQ_BASE              0xc0
171 #define ARM_SMMU_PRIQ_PROD              0x100c8
172 #define ARM_SMMU_PRIQ_CONS              0x100cc
173 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
174 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
175 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
176
177 /* Common MSI config fields */
178 #define MSI_CFG0_ADDR_SHIFT             2
179 #define MSI_CFG0_ADDR_MASK              0x3fffffffffffUL
180 #define MSI_CFG2_SH_SHIFT               4
181 #define MSI_CFG2_SH_NSH                 (0UL << MSI_CFG2_SH_SHIFT)
182 #define MSI_CFG2_SH_OSH                 (2UL << MSI_CFG2_SH_SHIFT)
183 #define MSI_CFG2_SH_ISH                 (3UL << MSI_CFG2_SH_SHIFT)
184 #define MSI_CFG2_MEMATTR_SHIFT          0
185 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE   (0x1 << MSI_CFG2_MEMATTR_SHIFT)
186
187 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
188 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
189 #define Q_OVERFLOW_FLAG                 (1 << 31)
190 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
191 #define Q_ENT(q, p)                     ((q)->base +                    \
192                                          Q_IDX(q, p) * (q)->ent_dwords)
193
194 #define Q_BASE_RWA                      (1UL << 62)
195 #define Q_BASE_ADDR_SHIFT               5
196 #define Q_BASE_ADDR_MASK                0xfffffffffffUL
197 #define Q_BASE_LOG2SIZE_SHIFT           0
198 #define Q_BASE_LOG2SIZE_MASK            0x1fUL
199
200 /*
201  * Stream table.
202  *
203  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
204  * 2lvl: 128k L1 entries,
205  *       256 lazy entries per table (each table covers a PCI bus)
206  */
207 #define STRTAB_L1_SZ_SHIFT              20
208 #define STRTAB_SPLIT                    8
209
210 #define STRTAB_L1_DESC_DWORDS           1
211 #define STRTAB_L1_DESC_SPAN_SHIFT       0
212 #define STRTAB_L1_DESC_SPAN_MASK        0x1fUL
213 #define STRTAB_L1_DESC_L2PTR_SHIFT      6
214 #define STRTAB_L1_DESC_L2PTR_MASK       0x3ffffffffffUL
215
216 #define STRTAB_STE_DWORDS               8
217 #define STRTAB_STE_0_V                  (1UL << 0)
218 #define STRTAB_STE_0_CFG_SHIFT          1
219 #define STRTAB_STE_0_CFG_MASK           0x7UL
220 #define STRTAB_STE_0_CFG_ABORT          (0UL << STRTAB_STE_0_CFG_SHIFT)
221 #define STRTAB_STE_0_CFG_BYPASS         (4UL << STRTAB_STE_0_CFG_SHIFT)
222 #define STRTAB_STE_0_CFG_S1_TRANS       (5UL << STRTAB_STE_0_CFG_SHIFT)
223 #define STRTAB_STE_0_CFG_S2_TRANS       (6UL << STRTAB_STE_0_CFG_SHIFT)
224
225 #define STRTAB_STE_0_S1FMT_SHIFT        4
226 #define STRTAB_STE_0_S1FMT_LINEAR       (0UL << STRTAB_STE_0_S1FMT_SHIFT)
227 #define STRTAB_STE_0_S1CTXPTR_SHIFT     6
228 #define STRTAB_STE_0_S1CTXPTR_MASK      0x3ffffffffffUL
229 #define STRTAB_STE_0_S1CDMAX_SHIFT      59
230 #define STRTAB_STE_0_S1CDMAX_MASK       0x1fUL
231
232 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
233 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
234 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
235 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
236 #define STRTAB_STE_1_S1C_SH_NSH         0UL
237 #define STRTAB_STE_1_S1C_SH_OSH         2UL
238 #define STRTAB_STE_1_S1C_SH_ISH         3UL
239 #define STRTAB_STE_1_S1CIR_SHIFT        2
240 #define STRTAB_STE_1_S1COR_SHIFT        4
241 #define STRTAB_STE_1_S1CSH_SHIFT        6
242
243 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
244
245 #define STRTAB_STE_1_EATS_ABT           0UL
246 #define STRTAB_STE_1_EATS_TRANS         1UL
247 #define STRTAB_STE_1_EATS_S1CHK         2UL
248 #define STRTAB_STE_1_EATS_SHIFT         28
249
250 #define STRTAB_STE_1_STRW_NSEL1         0UL
251 #define STRTAB_STE_1_STRW_EL2           2UL
252 #define STRTAB_STE_1_STRW_SHIFT         30
253
254 #define STRTAB_STE_2_S2VMID_SHIFT       0
255 #define STRTAB_STE_2_S2VMID_MASK        0xffffUL
256 #define STRTAB_STE_2_VTCR_SHIFT         32
257 #define STRTAB_STE_2_VTCR_MASK          0x7ffffUL
258 #define STRTAB_STE_2_S2AA64             (1UL << 51)
259 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
260 #define STRTAB_STE_2_S2PTW              (1UL << 54)
261 #define STRTAB_STE_2_S2R                (1UL << 58)
262
263 #define STRTAB_STE_3_S2TTB_SHIFT        4
264 #define STRTAB_STE_3_S2TTB_MASK         0xfffffffffffUL
265
266 /* Context descriptor (stage-1 only) */
267 #define CTXDESC_CD_DWORDS               8
268 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT     0
269 #define ARM64_TCR_T0SZ_SHIFT            0
270 #define ARM64_TCR_T0SZ_MASK             0x1fUL
271 #define CTXDESC_CD_0_TCR_TG0_SHIFT      6
272 #define ARM64_TCR_TG0_SHIFT             14
273 #define ARM64_TCR_TG0_MASK              0x3UL
274 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT    8
275 #define ARM64_TCR_IRGN0_SHIFT           8
276 #define ARM64_TCR_IRGN0_MASK            0x3UL
277 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT    10
278 #define ARM64_TCR_ORGN0_SHIFT           10
279 #define ARM64_TCR_ORGN0_MASK            0x3UL
280 #define CTXDESC_CD_0_TCR_SH0_SHIFT      12
281 #define ARM64_TCR_SH0_SHIFT             12
282 #define ARM64_TCR_SH0_MASK              0x3UL
283 #define CTXDESC_CD_0_TCR_EPD0_SHIFT     14
284 #define ARM64_TCR_EPD0_SHIFT            7
285 #define ARM64_TCR_EPD0_MASK             0x1UL
286 #define CTXDESC_CD_0_TCR_EPD1_SHIFT     30
287 #define ARM64_TCR_EPD1_SHIFT            23
288 #define ARM64_TCR_EPD1_MASK             0x1UL
289
290 #define CTXDESC_CD_0_ENDI               (1UL << 15)
291 #define CTXDESC_CD_0_V                  (1UL << 31)
292
293 #define CTXDESC_CD_0_TCR_IPS_SHIFT      32
294 #define ARM64_TCR_IPS_SHIFT             32
295 #define ARM64_TCR_IPS_MASK              0x7UL
296 #define CTXDESC_CD_0_TCR_TBI0_SHIFT     38
297 #define ARM64_TCR_TBI0_SHIFT            37
298 #define ARM64_TCR_TBI0_MASK             0x1UL
299
300 #define CTXDESC_CD_0_AA64               (1UL << 41)
301 #define CTXDESC_CD_0_R                  (1UL << 45)
302 #define CTXDESC_CD_0_A                  (1UL << 46)
303 #define CTXDESC_CD_0_ASET_SHIFT         47
304 #define CTXDESC_CD_0_ASET_SHARED        (0UL << CTXDESC_CD_0_ASET_SHIFT)
305 #define CTXDESC_CD_0_ASET_PRIVATE       (1UL << CTXDESC_CD_0_ASET_SHIFT)
306 #define CTXDESC_CD_0_ASID_SHIFT         48
307 #define CTXDESC_CD_0_ASID_MASK          0xffffUL
308
309 #define CTXDESC_CD_1_TTB0_SHIFT         4
310 #define CTXDESC_CD_1_TTB0_MASK          0xfffffffffffUL
311
312 #define CTXDESC_CD_3_MAIR_SHIFT         0
313
314 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
315 #define ARM_SMMU_TCR2CD(tcr, fld)                                       \
316         (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)    \
317          << CTXDESC_CD_0_TCR_##fld##_SHIFT)
318
319 /* Command queue */
320 #define CMDQ_ENT_DWORDS                 2
321 #define CMDQ_MAX_SZ_SHIFT               8
322
323 #define CMDQ_ERR_SHIFT                  24
324 #define CMDQ_ERR_MASK                   0x7f
325 #define CMDQ_ERR_CERROR_NONE_IDX        0
326 #define CMDQ_ERR_CERROR_ILL_IDX         1
327 #define CMDQ_ERR_CERROR_ABT_IDX         2
328
329 #define CMDQ_0_OP_SHIFT                 0
330 #define CMDQ_0_OP_MASK                  0xffUL
331 #define CMDQ_0_SSV                      (1UL << 11)
332
333 #define CMDQ_PREFETCH_0_SID_SHIFT       32
334 #define CMDQ_PREFETCH_1_SIZE_SHIFT      0
335 #define CMDQ_PREFETCH_1_ADDR_MASK       ~0xfffUL
336
337 #define CMDQ_CFGI_0_SID_SHIFT           32
338 #define CMDQ_CFGI_0_SID_MASK            0xffffffffUL
339 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
340 #define CMDQ_CFGI_1_RANGE_SHIFT         0
341 #define CMDQ_CFGI_1_RANGE_MASK          0x1fUL
342
343 #define CMDQ_TLBI_0_VMID_SHIFT          32
344 #define CMDQ_TLBI_0_ASID_SHIFT          48
345 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
346 #define CMDQ_TLBI_1_ADDR_MASK           ~0xfffUL
347
348 #define CMDQ_PRI_0_SSID_SHIFT           12
349 #define CMDQ_PRI_0_SSID_MASK            0xfffffUL
350 #define CMDQ_PRI_0_SID_SHIFT            32
351 #define CMDQ_PRI_0_SID_MASK             0xffffffffUL
352 #define CMDQ_PRI_1_GRPID_SHIFT          0
353 #define CMDQ_PRI_1_GRPID_MASK           0x1ffUL
354 #define CMDQ_PRI_1_RESP_SHIFT           12
355 #define CMDQ_PRI_1_RESP_DENY            (0UL << CMDQ_PRI_1_RESP_SHIFT)
356 #define CMDQ_PRI_1_RESP_FAIL            (1UL << CMDQ_PRI_1_RESP_SHIFT)
357 #define CMDQ_PRI_1_RESP_SUCC            (2UL << CMDQ_PRI_1_RESP_SHIFT)
358
359 #define CMDQ_SYNC_0_CS_SHIFT            12
360 #define CMDQ_SYNC_0_CS_NONE             (0UL << CMDQ_SYNC_0_CS_SHIFT)
361 #define CMDQ_SYNC_0_CS_SEV              (2UL << CMDQ_SYNC_0_CS_SHIFT)
362
363 /* Event queue */
364 #define EVTQ_ENT_DWORDS                 4
365 #define EVTQ_MAX_SZ_SHIFT               7
366
367 #define EVTQ_0_ID_SHIFT                 0
368 #define EVTQ_0_ID_MASK                  0xffUL
369
370 /* PRI queue */
371 #define PRIQ_ENT_DWORDS                 2
372 #define PRIQ_MAX_SZ_SHIFT               8
373
374 #define PRIQ_0_SID_SHIFT                0
375 #define PRIQ_0_SID_MASK                 0xffffffffUL
376 #define PRIQ_0_SSID_SHIFT               32
377 #define PRIQ_0_SSID_MASK                0xfffffUL
378 #define PRIQ_0_OF                       (1UL << 57)
379 #define PRIQ_0_PERM_PRIV                (1UL << 58)
380 #define PRIQ_0_PERM_EXEC                (1UL << 59)
381 #define PRIQ_0_PERM_READ                (1UL << 60)
382 #define PRIQ_0_PERM_WRITE               (1UL << 61)
383 #define PRIQ_0_PRG_LAST                 (1UL << 62)
384 #define PRIQ_0_SSID_V                   (1UL << 63)
385
386 #define PRIQ_1_PRG_IDX_SHIFT            0
387 #define PRIQ_1_PRG_IDX_MASK             0x1ffUL
388 #define PRIQ_1_ADDR_SHIFT               12
389 #define PRIQ_1_ADDR_MASK                0xfffffffffffffUL
390
391 /* High-level queue structures */
392 #define ARM_SMMU_POLL_TIMEOUT_US        100
393
394 static bool disable_bypass;
395 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
396 MODULE_PARM_DESC(disable_bypass,
397         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
398
399 enum pri_resp {
400         PRI_RESP_DENY,
401         PRI_RESP_FAIL,
402         PRI_RESP_SUCC,
403 };
404
405 struct arm_smmu_cmdq_ent {
406         /* Common fields */
407         u8                              opcode;
408         bool                            substream_valid;
409
410         /* Command-specific fields */
411         union {
412                 #define CMDQ_OP_PREFETCH_CFG    0x1
413                 struct {
414                         u32                     sid;
415                         u8                      size;
416                         u64                     addr;
417                 } prefetch;
418
419                 #define CMDQ_OP_CFGI_STE        0x3
420                 #define CMDQ_OP_CFGI_ALL        0x4
421                 struct {
422                         u32                     sid;
423                         union {
424                                 bool            leaf;
425                                 u8              span;
426                         };
427                 } cfgi;
428
429                 #define CMDQ_OP_TLBI_NH_ASID    0x11
430                 #define CMDQ_OP_TLBI_NH_VA      0x12
431                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
432                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
433                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
434                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
435                 struct {
436                         u16                     asid;
437                         u16                     vmid;
438                         bool                    leaf;
439                         u64                     addr;
440                 } tlbi;
441
442                 #define CMDQ_OP_PRI_RESP        0x41
443                 struct {
444                         u32                     sid;
445                         u32                     ssid;
446                         u16                     grpid;
447                         enum pri_resp           resp;
448                 } pri;
449
450                 #define CMDQ_OP_CMD_SYNC        0x46
451         };
452 };
453
454 struct arm_smmu_queue {
455         int                             irq; /* Wired interrupt */
456
457         __le64                          *base;
458         dma_addr_t                      base_dma;
459         u64                             q_base;
460
461         size_t                          ent_dwords;
462         u32                             max_n_shift;
463         u32                             prod;
464         u32                             cons;
465
466         u32 __iomem                     *prod_reg;
467         u32 __iomem                     *cons_reg;
468 };
469
470 struct arm_smmu_cmdq {
471         struct arm_smmu_queue           q;
472         spinlock_t                      lock;
473 };
474
475 struct arm_smmu_evtq {
476         struct arm_smmu_queue           q;
477         u32                             max_stalls;
478 };
479
480 struct arm_smmu_priq {
481         struct arm_smmu_queue           q;
482 };
483
484 /* High-level stream table and context descriptor structures */
485 struct arm_smmu_strtab_l1_desc {
486         u8                              span;
487
488         __le64                          *l2ptr;
489         dma_addr_t                      l2ptr_dma;
490 };
491
492 struct arm_smmu_s1_cfg {
493         __le64                          *cdptr;
494         dma_addr_t                      cdptr_dma;
495
496         struct arm_smmu_ctx_desc {
497                 u16     asid;
498                 u64     ttbr;
499                 u64     tcr;
500                 u64     mair;
501         }                               cd;
502 };
503
504 struct arm_smmu_s2_cfg {
505         u16                             vmid;
506         u64                             vttbr;
507         u64                             vtcr;
508 };
509
510 struct arm_smmu_strtab_ent {
511         bool                            valid;
512
513         bool                            bypass; /* Overrides s1/s2 config */
514         struct arm_smmu_s1_cfg          *s1_cfg;
515         struct arm_smmu_s2_cfg          *s2_cfg;
516 };
517
518 struct arm_smmu_strtab_cfg {
519         __le64                          *strtab;
520         dma_addr_t                      strtab_dma;
521         struct arm_smmu_strtab_l1_desc  *l1_desc;
522         unsigned int                    num_l1_ents;
523
524         u64                             strtab_base;
525         u32                             strtab_base_cfg;
526 };
527
528 /* An SMMUv3 instance */
529 struct arm_smmu_device {
530         struct device                   *dev;
531         void __iomem                    *base;
532
533 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
534 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
535 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
536 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
537 #define ARM_SMMU_FEAT_PRI               (1 << 4)
538 #define ARM_SMMU_FEAT_ATS               (1 << 5)
539 #define ARM_SMMU_FEAT_SEV               (1 << 6)
540 #define ARM_SMMU_FEAT_MSI               (1 << 7)
541 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
542 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
543 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
544 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
545 #define ARM_SMMU_FEAT_HYP               (1 << 12)
546         u32                             features;
547
548 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
549         u32                             options;
550
551         struct arm_smmu_cmdq            cmdq;
552         struct arm_smmu_evtq            evtq;
553         struct arm_smmu_priq            priq;
554
555         int                             gerr_irq;
556
557         unsigned long                   ias; /* IPA */
558         unsigned long                   oas; /* PA */
559
560 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
561         unsigned int                    asid_bits;
562         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
563
564 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
565         unsigned int                    vmid_bits;
566         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
567
568         unsigned int                    ssid_bits;
569         unsigned int                    sid_bits;
570
571         struct arm_smmu_strtab_cfg      strtab_cfg;
572 };
573
574 /* SMMU private data for an IOMMU group */
575 struct arm_smmu_group {
576         struct arm_smmu_device          *smmu;
577         struct arm_smmu_domain          *domain;
578         int                             num_sids;
579         u32                             *sids;
580         struct arm_smmu_strtab_ent      ste;
581 };
582
583 /* SMMU private data for an IOMMU domain */
584 enum arm_smmu_domain_stage {
585         ARM_SMMU_DOMAIN_S1 = 0,
586         ARM_SMMU_DOMAIN_S2,
587         ARM_SMMU_DOMAIN_NESTED,
588 };
589
590 struct arm_smmu_domain {
591         struct arm_smmu_device          *smmu;
592         struct mutex                    init_mutex; /* Protects smmu pointer */
593
594         struct io_pgtable_ops           *pgtbl_ops;
595         spinlock_t                      pgtbl_lock;
596
597         enum arm_smmu_domain_stage      stage;
598         union {
599                 struct arm_smmu_s1_cfg  s1_cfg;
600                 struct arm_smmu_s2_cfg  s2_cfg;
601         };
602
603         struct iommu_domain             domain;
604 };
605
606 struct arm_smmu_option_prop {
607         u32 opt;
608         const char *prop;
609 };
610
611 static struct arm_smmu_option_prop arm_smmu_options[] = {
612         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
613         { 0, NULL},
614 };
615
616 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
617 {
618         return container_of(dom, struct arm_smmu_domain, domain);
619 }
620
621 static void parse_driver_options(struct arm_smmu_device *smmu)
622 {
623         int i = 0;
624
625         do {
626                 if (of_property_read_bool(smmu->dev->of_node,
627                                                 arm_smmu_options[i].prop)) {
628                         smmu->options |= arm_smmu_options[i].opt;
629                         dev_notice(smmu->dev, "option %s\n",
630                                 arm_smmu_options[i].prop);
631                 }
632         } while (arm_smmu_options[++i].opt);
633 }
634
635 /* Low-level queue manipulation functions */
636 static bool queue_full(struct arm_smmu_queue *q)
637 {
638         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
639                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
640 }
641
642 static bool queue_empty(struct arm_smmu_queue *q)
643 {
644         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
645                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
646 }
647
648 static void queue_sync_cons(struct arm_smmu_queue *q)
649 {
650         q->cons = readl_relaxed(q->cons_reg);
651 }
652
653 static void queue_inc_cons(struct arm_smmu_queue *q)
654 {
655         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
656
657         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
658         writel(q->cons, q->cons_reg);
659 }
660
661 static int queue_sync_prod(struct arm_smmu_queue *q)
662 {
663         int ret = 0;
664         u32 prod = readl_relaxed(q->prod_reg);
665
666         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
667                 ret = -EOVERFLOW;
668
669         q->prod = prod;
670         return ret;
671 }
672
673 static void queue_inc_prod(struct arm_smmu_queue *q)
674 {
675         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
676
677         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
678         writel(q->prod, q->prod_reg);
679 }
680
681 static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
682 {
683         if (Q_WRP(q, q->cons) == Q_WRP(q, until))
684                 return Q_IDX(q, q->cons) < Q_IDX(q, until);
685
686         return Q_IDX(q, q->cons) >= Q_IDX(q, until);
687 }
688
689 static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
690 {
691         ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
692
693         while (queue_sync_cons(q), __queue_cons_before(q, until)) {
694                 if (ktime_compare(ktime_get(), timeout) > 0)
695                         return -ETIMEDOUT;
696
697                 if (wfe) {
698                         wfe();
699                 } else {
700                         cpu_relax();
701                         udelay(1);
702                 }
703         }
704
705         return 0;
706 }
707
708 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
709 {
710         int i;
711
712         for (i = 0; i < n_dwords; ++i)
713                 *dst++ = cpu_to_le64(*src++);
714 }
715
716 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
717 {
718         if (queue_full(q))
719                 return -ENOSPC;
720
721         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
722         queue_inc_prod(q);
723         return 0;
724 }
725
726 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
727 {
728         int i;
729
730         for (i = 0; i < n_dwords; ++i)
731                 *dst++ = le64_to_cpu(*src++);
732 }
733
734 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
735 {
736         if (queue_empty(q))
737                 return -EAGAIN;
738
739         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
740         queue_inc_cons(q);
741         return 0;
742 }
743
744 /* High-level queue accessors */
745 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
746 {
747         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
748         cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
749
750         switch (ent->opcode) {
751         case CMDQ_OP_TLBI_EL2_ALL:
752         case CMDQ_OP_TLBI_NSNH_ALL:
753                 break;
754         case CMDQ_OP_PREFETCH_CFG:
755                 cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
756                 cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
757                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
758                 break;
759         case CMDQ_OP_CFGI_STE:
760                 cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
761                 cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
762                 break;
763         case CMDQ_OP_CFGI_ALL:
764                 /* Cover the entire SID range */
765                 cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
766                 break;
767         case CMDQ_OP_TLBI_NH_VA:
768                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
769                 /* Fallthrough */
770         case CMDQ_OP_TLBI_S2_IPA:
771                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
772                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
773                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_ADDR_MASK;
774                 break;
775         case CMDQ_OP_TLBI_NH_ASID:
776                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
777                 /* Fallthrough */
778         case CMDQ_OP_TLBI_S12_VMALL:
779                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
780                 break;
781         case CMDQ_OP_PRI_RESP:
782                 cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
783                 cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
784                 cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
785                 cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
786                 switch (ent->pri.resp) {
787                 case PRI_RESP_DENY:
788                         cmd[1] |= CMDQ_PRI_1_RESP_DENY;
789                         break;
790                 case PRI_RESP_FAIL:
791                         cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
792                         break;
793                 case PRI_RESP_SUCC:
794                         cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
795                         break;
796                 default:
797                         return -EINVAL;
798                 }
799                 break;
800         case CMDQ_OP_CMD_SYNC:
801                 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
802                 break;
803         default:
804                 return -ENOENT;
805         }
806
807         return 0;
808 }
809
810 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
811 {
812         static const char *cerror_str[] = {
813                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
814                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
815                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
816         };
817
818         int i;
819         u64 cmd[CMDQ_ENT_DWORDS];
820         struct arm_smmu_queue *q = &smmu->cmdq.q;
821         u32 cons = readl_relaxed(q->cons_reg);
822         u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
823         struct arm_smmu_cmdq_ent cmd_sync = {
824                 .opcode = CMDQ_OP_CMD_SYNC,
825         };
826
827         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
828                 cerror_str[idx]);
829
830         switch (idx) {
831         case CMDQ_ERR_CERROR_ILL_IDX:
832                 break;
833         case CMDQ_ERR_CERROR_ABT_IDX:
834                 dev_err(smmu->dev, "retrying command fetch\n");
835         case CMDQ_ERR_CERROR_NONE_IDX:
836                 return;
837         }
838
839         /*
840          * We may have concurrent producers, so we need to be careful
841          * not to touch any of the shadow cmdq state.
842          */
843         queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
844         dev_err(smmu->dev, "skipping command in error state:\n");
845         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
846                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
847
848         /* Convert the erroneous command into a CMD_SYNC */
849         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
850                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
851                 return;
852         }
853
854         queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
855 }
856
857 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
858                                     struct arm_smmu_cmdq_ent *ent)
859 {
860         u32 until;
861         u64 cmd[CMDQ_ENT_DWORDS];
862         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
863         struct arm_smmu_queue *q = &smmu->cmdq.q;
864
865         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
866                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
867                          ent->opcode);
868                 return;
869         }
870
871         spin_lock(&smmu->cmdq.lock);
872         while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
873                 /*
874                  * Keep the queue locked, otherwise the producer could wrap
875                  * twice and we could see a future consumer pointer that looks
876                  * like it's behind us.
877                  */
878                 if (queue_poll_cons(q, until, wfe))
879                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
880         }
881
882         if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
883                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
884         spin_unlock(&smmu->cmdq.lock);
885 }
886
887 /* Context descriptor manipulation functions */
888 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
889 {
890         u64 val = 0;
891
892         /* Repack the TCR. Just care about TTBR0 for now */
893         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
894         val |= ARM_SMMU_TCR2CD(tcr, TG0);
895         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
896         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
897         val |= ARM_SMMU_TCR2CD(tcr, SH0);
898         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
899         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
900         val |= ARM_SMMU_TCR2CD(tcr, IPS);
901         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
902
903         return val;
904 }
905
906 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
907                                     struct arm_smmu_s1_cfg *cfg)
908 {
909         u64 val;
910
911         /*
912          * We don't need to issue any invalidation here, as we'll invalidate
913          * the STE when installing the new entry anyway.
914          */
915         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
916 #ifdef __BIG_ENDIAN
917               CTXDESC_CD_0_ENDI |
918 #endif
919               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
920               CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
921               CTXDESC_CD_0_V;
922         cfg->cdptr[0] = cpu_to_le64(val);
923
924         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
925         cfg->cdptr[1] = cpu_to_le64(val);
926
927         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
928 }
929
930 /* Stream table manipulation functions */
931 static void
932 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
933 {
934         u64 val = 0;
935
936         val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
937                 << STRTAB_L1_DESC_SPAN_SHIFT;
938         val |= desc->l2ptr_dma &
939                STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
940
941         *dst = cpu_to_le64(val);
942 }
943
944 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
945 {
946         struct arm_smmu_cmdq_ent cmd = {
947                 .opcode = CMDQ_OP_CFGI_STE,
948                 .cfgi   = {
949                         .sid    = sid,
950                         .leaf   = true,
951                 },
952         };
953
954         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
955         cmd.opcode = CMDQ_OP_CMD_SYNC;
956         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
957 }
958
959 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
960                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
961 {
962         /*
963          * This is hideously complicated, but we only really care about
964          * three cases at the moment:
965          *
966          * 1. Invalid (all zero) -> bypass  (init)
967          * 2. Bypass -> translation (attach)
968          * 3. Translation -> bypass (detach)
969          *
970          * Given that we can't update the STE atomically and the SMMU
971          * doesn't read the thing in a defined order, that leaves us
972          * with the following maintenance requirements:
973          *
974          * 1. Update Config, return (init time STEs aren't live)
975          * 2. Write everything apart from dword 0, sync, write dword 0, sync
976          * 3. Update Config, sync
977          */
978         u64 val = le64_to_cpu(dst[0]);
979         bool ste_live = false;
980         struct arm_smmu_cmdq_ent prefetch_cmd = {
981                 .opcode         = CMDQ_OP_PREFETCH_CFG,
982                 .prefetch       = {
983                         .sid    = sid,
984                 },
985         };
986
987         if (val & STRTAB_STE_0_V) {
988                 u64 cfg;
989
990                 cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
991                 switch (cfg) {
992                 case STRTAB_STE_0_CFG_BYPASS:
993                         break;
994                 case STRTAB_STE_0_CFG_S1_TRANS:
995                 case STRTAB_STE_0_CFG_S2_TRANS:
996                         ste_live = true;
997                         break;
998                 default:
999                         BUG(); /* STE corruption */
1000                 }
1001         }
1002
1003         /* Nuke the existing Config, as we're going to rewrite it */
1004         val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1005
1006         if (ste->valid)
1007                 val |= STRTAB_STE_0_V;
1008         else
1009                 val &= ~STRTAB_STE_0_V;
1010
1011         if (ste->bypass) {
1012                 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1013                                       : STRTAB_STE_0_CFG_BYPASS;
1014                 dst[0] = cpu_to_le64(val);
1015                 dst[2] = 0; /* Nuke the VMID */
1016                 if (ste_live)
1017                         arm_smmu_sync_ste_for_sid(smmu, sid);
1018                 return;
1019         }
1020
1021         if (ste->s1_cfg) {
1022                 BUG_ON(ste_live);
1023                 dst[1] = cpu_to_le64(
1024                          STRTAB_STE_1_S1C_CACHE_WBRA
1025                          << STRTAB_STE_1_S1CIR_SHIFT |
1026                          STRTAB_STE_1_S1C_CACHE_WBRA
1027                          << STRTAB_STE_1_S1COR_SHIFT |
1028                          STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1029                          STRTAB_STE_1_S1STALLD |
1030 #ifdef CONFIG_PCI_ATS
1031                          STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1032 #endif
1033                          STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1034
1035                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1036                         << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1037                         STRTAB_STE_0_CFG_S1_TRANS;
1038
1039         }
1040
1041         if (ste->s2_cfg) {
1042                 BUG_ON(ste_live);
1043                 dst[2] = cpu_to_le64(
1044                          ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1045                          (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1046                           << STRTAB_STE_2_VTCR_SHIFT |
1047 #ifdef __BIG_ENDIAN
1048                          STRTAB_STE_2_S2ENDI |
1049 #endif
1050                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1051                          STRTAB_STE_2_S2R);
1052
1053                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1054                          STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1055
1056                 val |= STRTAB_STE_0_CFG_S2_TRANS;
1057         }
1058
1059         arm_smmu_sync_ste_for_sid(smmu, sid);
1060         dst[0] = cpu_to_le64(val);
1061         arm_smmu_sync_ste_for_sid(smmu, sid);
1062
1063         /* It's likely that we'll want to use the new STE soon */
1064         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1065                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1066 }
1067
1068 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1069 {
1070         unsigned int i;
1071         struct arm_smmu_strtab_ent ste = {
1072                 .valid  = true,
1073                 .bypass = true,
1074         };
1075
1076         for (i = 0; i < nent; ++i) {
1077                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1078                 strtab += STRTAB_STE_DWORDS;
1079         }
1080 }
1081
1082 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1083 {
1084         size_t size;
1085         void *strtab;
1086         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1087         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1088
1089         if (desc->l2ptr)
1090                 return 0;
1091
1092         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1093         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1094
1095         desc->span = STRTAB_SPLIT + 1;
1096         desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1097                                           GFP_KERNEL);
1098         if (!desc->l2ptr) {
1099                 dev_err(smmu->dev,
1100                         "failed to allocate l2 stream table for SID %u\n",
1101                         sid);
1102                 return -ENOMEM;
1103         }
1104
1105         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1106         arm_smmu_write_strtab_l1_desc(strtab, desc);
1107         return 0;
1108 }
1109
1110 /* IRQ and event handlers */
1111 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1112 {
1113         int i;
1114         struct arm_smmu_device *smmu = dev;
1115         struct arm_smmu_queue *q = &smmu->evtq.q;
1116         u64 evt[EVTQ_ENT_DWORDS];
1117
1118         while (!queue_remove_raw(q, evt)) {
1119                 u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1120
1121                 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1122                 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1123                         dev_info(smmu->dev, "\t0x%016llx\n",
1124                                  (unsigned long long)evt[i]);
1125         }
1126
1127         /* Sync our overflow flag, as we believe we're up to speed */
1128         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1129         return IRQ_HANDLED;
1130 }
1131
1132 static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1133 {
1134         irqreturn_t ret = IRQ_WAKE_THREAD;
1135         struct arm_smmu_device *smmu = dev;
1136         struct arm_smmu_queue *q = &smmu->evtq.q;
1137
1138         /*
1139          * Not much we can do on overflow, so scream and pretend we're
1140          * trying harder.
1141          */
1142         if (queue_sync_prod(q) == -EOVERFLOW)
1143                 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1144         else if (queue_empty(q))
1145                 ret = IRQ_NONE;
1146
1147         return ret;
1148 }
1149
1150 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1151 {
1152         struct arm_smmu_device *smmu = dev;
1153         struct arm_smmu_queue *q = &smmu->priq.q;
1154         u64 evt[PRIQ_ENT_DWORDS];
1155
1156         while (!queue_remove_raw(q, evt)) {
1157                 u32 sid, ssid;
1158                 u16 grpid;
1159                 bool ssv, last;
1160
1161                 sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1162                 ssv = evt[0] & PRIQ_0_SSID_V;
1163                 ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1164                 last = evt[0] & PRIQ_0_PRG_LAST;
1165                 grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1166
1167                 dev_info(smmu->dev, "unexpected PRI request received:\n");
1168                 dev_info(smmu->dev,
1169                          "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1170                          sid, ssid, grpid, last ? "L" : "",
1171                          evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1172                          evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1173                          evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1174                          evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1175                          evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1176
1177                 if (last) {
1178                         struct arm_smmu_cmdq_ent cmd = {
1179                                 .opcode                 = CMDQ_OP_PRI_RESP,
1180                                 .substream_valid        = ssv,
1181                                 .pri                    = {
1182                                         .sid    = sid,
1183                                         .ssid   = ssid,
1184                                         .grpid  = grpid,
1185                                         .resp   = PRI_RESP_DENY,
1186                                 },
1187                         };
1188
1189                         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1190                 }
1191         }
1192
1193         /* Sync our overflow flag, as we believe we're up to speed */
1194         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1195         return IRQ_HANDLED;
1196 }
1197
1198 static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1199 {
1200         irqreturn_t ret = IRQ_WAKE_THREAD;
1201         struct arm_smmu_device *smmu = dev;
1202         struct arm_smmu_queue *q = &smmu->priq.q;
1203
1204         /* PRIQ overflow indicates a programming error */
1205         if (queue_sync_prod(q) == -EOVERFLOW)
1206                 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1207         else if (queue_empty(q))
1208                 ret = IRQ_NONE;
1209
1210         return ret;
1211 }
1212
1213 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1214 {
1215         /* We don't actually use CMD_SYNC interrupts for anything */
1216         return IRQ_HANDLED;
1217 }
1218
1219 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1220
1221 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1222 {
1223         u32 gerror, gerrorn;
1224         struct arm_smmu_device *smmu = dev;
1225
1226         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1227         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1228
1229         gerror ^= gerrorn;
1230         if (!(gerror & GERROR_ERR_MASK))
1231                 return IRQ_NONE; /* No errors pending */
1232
1233         dev_warn(smmu->dev,
1234                  "unexpected global error reported (0x%08x), this could be serious\n",
1235                  gerror);
1236
1237         if (gerror & GERROR_SFM_ERR) {
1238                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1239                 arm_smmu_device_disable(smmu);
1240         }
1241
1242         if (gerror & GERROR_MSI_GERROR_ABT_ERR)
1243                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1244
1245         if (gerror & GERROR_MSI_PRIQ_ABT_ERR) {
1246                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1247                 arm_smmu_priq_handler(irq, smmu->dev);
1248         }
1249
1250         if (gerror & GERROR_MSI_EVTQ_ABT_ERR) {
1251                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1252                 arm_smmu_evtq_handler(irq, smmu->dev);
1253         }
1254
1255         if (gerror & GERROR_MSI_CMDQ_ABT_ERR) {
1256                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1257                 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1258         }
1259
1260         if (gerror & GERROR_PRIQ_ABT_ERR)
1261                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1262
1263         if (gerror & GERROR_EVTQ_ABT_ERR)
1264                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1265
1266         if (gerror & GERROR_CMDQ_ERR)
1267                 arm_smmu_cmdq_skip_err(smmu);
1268
1269         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1270         return IRQ_HANDLED;
1271 }
1272
1273 /* IO_PGTABLE API */
1274 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1275 {
1276         struct arm_smmu_cmdq_ent cmd;
1277
1278         cmd.opcode = CMDQ_OP_CMD_SYNC;
1279         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1280 }
1281
1282 static void arm_smmu_tlb_sync(void *cookie)
1283 {
1284         struct arm_smmu_domain *smmu_domain = cookie;
1285         __arm_smmu_tlb_sync(smmu_domain->smmu);
1286 }
1287
1288 static void arm_smmu_tlb_inv_context(void *cookie)
1289 {
1290         struct arm_smmu_domain *smmu_domain = cookie;
1291         struct arm_smmu_device *smmu = smmu_domain->smmu;
1292         struct arm_smmu_cmdq_ent cmd;
1293
1294         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1295                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1296                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1297                 cmd.tlbi.vmid   = 0;
1298         } else {
1299                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1300                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1301         }
1302
1303         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1304         __arm_smmu_tlb_sync(smmu);
1305 }
1306
1307 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1308                                           bool leaf, void *cookie)
1309 {
1310         struct arm_smmu_domain *smmu_domain = cookie;
1311         struct arm_smmu_device *smmu = smmu_domain->smmu;
1312         struct arm_smmu_cmdq_ent cmd = {
1313                 .tlbi = {
1314                         .leaf   = leaf,
1315                         .addr   = iova,
1316                 },
1317         };
1318
1319         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1320                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1321                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1322         } else {
1323                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1324                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1325         }
1326
1327         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1328 }
1329
1330 static struct iommu_gather_ops arm_smmu_gather_ops = {
1331         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1332         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1333         .tlb_sync       = arm_smmu_tlb_sync,
1334 };
1335
1336 /* IOMMU API */
1337 static bool arm_smmu_capable(enum iommu_cap cap)
1338 {
1339         switch (cap) {
1340         case IOMMU_CAP_CACHE_COHERENCY:
1341                 return true;
1342         case IOMMU_CAP_INTR_REMAP:
1343                 return true; /* MSIs are just memory writes */
1344         case IOMMU_CAP_NOEXEC:
1345                 return true;
1346         default:
1347                 return false;
1348         }
1349 }
1350
1351 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1352 {
1353         struct arm_smmu_domain *smmu_domain;
1354
1355         if (type != IOMMU_DOMAIN_UNMANAGED)
1356                 return NULL;
1357
1358         /*
1359          * Allocate the domain and initialise some of its data structures.
1360          * We can't really do anything meaningful until we've added a
1361          * master.
1362          */
1363         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1364         if (!smmu_domain)
1365                 return NULL;
1366
1367         mutex_init(&smmu_domain->init_mutex);
1368         spin_lock_init(&smmu_domain->pgtbl_lock);
1369         return &smmu_domain->domain;
1370 }
1371
1372 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1373 {
1374         int idx, size = 1 << span;
1375
1376         do {
1377                 idx = find_first_zero_bit(map, size);
1378                 if (idx == size)
1379                         return -ENOSPC;
1380         } while (test_and_set_bit(idx, map));
1381
1382         return idx;
1383 }
1384
1385 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1386 {
1387         clear_bit(idx, map);
1388 }
1389
1390 static void arm_smmu_domain_free(struct iommu_domain *domain)
1391 {
1392         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1393         struct arm_smmu_device *smmu = smmu_domain->smmu;
1394
1395         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1396
1397         /* Free the CD and ASID, if we allocated them */
1398         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1399                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1400
1401                 if (cfg->cdptr) {
1402                         dma_free_coherent(smmu_domain->smmu->dev,
1403                                           CTXDESC_CD_DWORDS << 3,
1404                                           cfg->cdptr,
1405                                           cfg->cdptr_dma);
1406
1407                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1408                 }
1409         } else {
1410                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1411                 if (cfg->vmid)
1412                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1413         }
1414
1415         kfree(smmu_domain);
1416 }
1417
1418 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1419                                        struct io_pgtable_cfg *pgtbl_cfg)
1420 {
1421         int ret;
1422         int asid;
1423         struct arm_smmu_device *smmu = smmu_domain->smmu;
1424         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1425
1426         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1427         if (IS_ERR_VALUE(asid))
1428                 return asid;
1429
1430         cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1431                                          &cfg->cdptr_dma, GFP_KERNEL);
1432         if (!cfg->cdptr) {
1433                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1434                 ret = -ENOMEM;
1435                 goto out_free_asid;
1436         }
1437
1438         cfg->cd.asid    = (u16)asid;
1439         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1440         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1441         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1442         return 0;
1443
1444 out_free_asid:
1445         arm_smmu_bitmap_free(smmu->asid_map, asid);
1446         return ret;
1447 }
1448
1449 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1450                                        struct io_pgtable_cfg *pgtbl_cfg)
1451 {
1452         int vmid;
1453         struct arm_smmu_device *smmu = smmu_domain->smmu;
1454         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1455
1456         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1457         if (IS_ERR_VALUE(vmid))
1458                 return vmid;
1459
1460         cfg->vmid       = (u16)vmid;
1461         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1462         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1463         return 0;
1464 }
1465
1466 static struct iommu_ops arm_smmu_ops;
1467
1468 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1469 {
1470         int ret;
1471         unsigned long ias, oas;
1472         enum io_pgtable_fmt fmt;
1473         struct io_pgtable_cfg pgtbl_cfg;
1474         struct io_pgtable_ops *pgtbl_ops;
1475         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1476                                  struct io_pgtable_cfg *);
1477         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1478         struct arm_smmu_device *smmu = smmu_domain->smmu;
1479
1480         /* Restrict the stage to what we can actually support */
1481         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1482                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1483         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1484                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1485
1486         switch (smmu_domain->stage) {
1487         case ARM_SMMU_DOMAIN_S1:
1488                 ias = VA_BITS;
1489                 oas = smmu->ias;
1490                 fmt = ARM_64_LPAE_S1;
1491                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1492                 break;
1493         case ARM_SMMU_DOMAIN_NESTED:
1494         case ARM_SMMU_DOMAIN_S2:
1495                 ias = smmu->ias;
1496                 oas = smmu->oas;
1497                 fmt = ARM_64_LPAE_S2;
1498                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1499                 break;
1500         default:
1501                 return -EINVAL;
1502         }
1503
1504         pgtbl_cfg = (struct io_pgtable_cfg) {
1505                 .pgsize_bitmap  = arm_smmu_ops.pgsize_bitmap,
1506                 .ias            = ias,
1507                 .oas            = oas,
1508                 .tlb            = &arm_smmu_gather_ops,
1509                 .iommu_dev      = smmu->dev,
1510         };
1511
1512         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1513         if (!pgtbl_ops)
1514                 return -ENOMEM;
1515
1516         arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1517         smmu_domain->pgtbl_ops = pgtbl_ops;
1518
1519         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1520         if (IS_ERR_VALUE(ret))
1521                 free_io_pgtable_ops(pgtbl_ops);
1522
1523         return ret;
1524 }
1525
1526 static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1527 {
1528         struct iommu_group *group;
1529         struct arm_smmu_group *smmu_group;
1530
1531         group = iommu_group_get(dev);
1532         if (!group)
1533                 return NULL;
1534
1535         smmu_group = iommu_group_get_iommudata(group);
1536         iommu_group_put(group);
1537         return smmu_group;
1538 }
1539
1540 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1541 {
1542         __le64 *step;
1543         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1544
1545         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1546                 struct arm_smmu_strtab_l1_desc *l1_desc;
1547                 int idx;
1548
1549                 /* Two-level walk */
1550                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1551                 l1_desc = &cfg->l1_desc[idx];
1552                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1553                 step = &l1_desc->l2ptr[idx];
1554         } else {
1555                 /* Simple linear lookup */
1556                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1557         }
1558
1559         return step;
1560 }
1561
1562 static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1563 {
1564         int i;
1565         struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1566         struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1567         struct arm_smmu_device *smmu = smmu_group->smmu;
1568
1569         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1570                 ste->s1_cfg = &smmu_domain->s1_cfg;
1571                 ste->s2_cfg = NULL;
1572                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1573         } else {
1574                 ste->s1_cfg = NULL;
1575                 ste->s2_cfg = &smmu_domain->s2_cfg;
1576         }
1577
1578         for (i = 0; i < smmu_group->num_sids; ++i) {
1579                 u32 sid = smmu_group->sids[i];
1580                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1581
1582                 arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1583         }
1584
1585         return 0;
1586 }
1587
1588 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1589 {
1590         int ret = 0;
1591         struct arm_smmu_device *smmu;
1592         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1593         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1594
1595         if (!smmu_group)
1596                 return -ENOENT;
1597
1598         /* Already attached to a different domain? */
1599         if (smmu_group->domain && smmu_group->domain != smmu_domain)
1600                 return -EEXIST;
1601
1602         smmu = smmu_group->smmu;
1603         mutex_lock(&smmu_domain->init_mutex);
1604
1605         if (!smmu_domain->smmu) {
1606                 smmu_domain->smmu = smmu;
1607                 ret = arm_smmu_domain_finalise(domain);
1608                 if (ret) {
1609                         smmu_domain->smmu = NULL;
1610                         goto out_unlock;
1611                 }
1612         } else if (smmu_domain->smmu != smmu) {
1613                 dev_err(dev,
1614                         "cannot attach to SMMU %s (upstream of %s)\n",
1615                         dev_name(smmu_domain->smmu->dev),
1616                         dev_name(smmu->dev));
1617                 ret = -ENXIO;
1618                 goto out_unlock;
1619         }
1620
1621         /* Group already attached to this domain? */
1622         if (smmu_group->domain)
1623                 goto out_unlock;
1624
1625         smmu_group->domain      = smmu_domain;
1626         smmu_group->ste.bypass  = false;
1627
1628         ret = arm_smmu_install_ste_for_group(smmu_group);
1629         if (IS_ERR_VALUE(ret))
1630                 smmu_group->domain = NULL;
1631
1632 out_unlock:
1633         mutex_unlock(&smmu_domain->init_mutex);
1634         return ret;
1635 }
1636
1637 static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
1638 {
1639         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1640         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1641
1642         BUG_ON(!smmu_domain);
1643         BUG_ON(!smmu_group);
1644
1645         mutex_lock(&smmu_domain->init_mutex);
1646         BUG_ON(smmu_group->domain != smmu_domain);
1647
1648         smmu_group->ste.bypass = true;
1649         if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
1650                 dev_warn(dev, "failed to install bypass STE\n");
1651
1652         smmu_group->domain = NULL;
1653         mutex_unlock(&smmu_domain->init_mutex);
1654 }
1655
1656 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1657                         phys_addr_t paddr, size_t size, int prot)
1658 {
1659         int ret;
1660         unsigned long flags;
1661         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1662         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1663
1664         if (!ops)
1665                 return -ENODEV;
1666
1667         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1668         ret = ops->map(ops, iova, paddr, size, prot);
1669         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1670         return ret;
1671 }
1672
1673 static size_t
1674 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1675 {
1676         size_t ret;
1677         unsigned long flags;
1678         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1679         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1680
1681         if (!ops)
1682                 return 0;
1683
1684         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1685         ret = ops->unmap(ops, iova, size);
1686         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1687         return ret;
1688 }
1689
1690 static phys_addr_t
1691 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1692 {
1693         phys_addr_t ret;
1694         unsigned long flags;
1695         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1696         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1697
1698         if (!ops)
1699                 return 0;
1700
1701         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1702         ret = ops->iova_to_phys(ops, iova);
1703         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1704
1705         return ret;
1706 }
1707
1708 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1709 {
1710         *(u32 *)sidp = alias;
1711         return 0; /* Continue walking */
1712 }
1713
1714 static void __arm_smmu_release_pci_iommudata(void *data)
1715 {
1716         kfree(data);
1717 }
1718
1719 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1720 {
1721         struct device_node *of_node;
1722         struct platform_device *smmu_pdev;
1723         struct arm_smmu_device *smmu = NULL;
1724         struct pci_bus *bus = pdev->bus;
1725
1726         /* Walk up to the root bus */
1727         while (!pci_is_root_bus(bus))
1728                 bus = bus->parent;
1729
1730         /* Follow the "iommus" phandle from the host controller */
1731         of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1732         if (!of_node)
1733                 return NULL;
1734
1735         /* See if we can find an SMMU corresponding to the phandle */
1736         smmu_pdev = of_find_device_by_node(of_node);
1737         if (smmu_pdev)
1738                 smmu = platform_get_drvdata(smmu_pdev);
1739
1740         of_node_put(of_node);
1741         return smmu;
1742 }
1743
1744 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1745 {
1746         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1747
1748         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1749                 limit *= 1UL << STRTAB_SPLIT;
1750
1751         return sid < limit;
1752 }
1753
1754 static int arm_smmu_add_device(struct device *dev)
1755 {
1756         int i, ret;
1757         u32 sid, *sids;
1758         struct pci_dev *pdev;
1759         struct iommu_group *group;
1760         struct arm_smmu_group *smmu_group;
1761         struct arm_smmu_device *smmu;
1762
1763         /* We only support PCI, for now */
1764         if (!dev_is_pci(dev))
1765                 return -ENODEV;
1766
1767         pdev = to_pci_dev(dev);
1768         group = iommu_group_get_for_dev(dev);
1769         if (IS_ERR(group))
1770                 return PTR_ERR(group);
1771
1772         smmu_group = iommu_group_get_iommudata(group);
1773         if (!smmu_group) {
1774                 smmu = arm_smmu_get_for_pci_dev(pdev);
1775                 if (!smmu) {
1776                         ret = -ENOENT;
1777                         goto out_put_group;
1778                 }
1779
1780                 smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1781                 if (!smmu_group) {
1782                         ret = -ENOMEM;
1783                         goto out_put_group;
1784                 }
1785
1786                 smmu_group->ste.valid   = true;
1787                 smmu_group->smmu        = smmu;
1788                 iommu_group_set_iommudata(group, smmu_group,
1789                                           __arm_smmu_release_pci_iommudata);
1790         } else {
1791                 smmu = smmu_group->smmu;
1792         }
1793
1794         /* Assume SID == RID until firmware tells us otherwise */
1795         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1796         for (i = 0; i < smmu_group->num_sids; ++i) {
1797                 /* If we already know about this SID, then we're done */
1798                 if (smmu_group->sids[i] == sid)
1799                         return 0;
1800         }
1801
1802         /* Check the SID is in range of the SMMU and our stream table */
1803         if (!arm_smmu_sid_in_range(smmu, sid)) {
1804                 ret = -ERANGE;
1805                 goto out_put_group;
1806         }
1807
1808         /* Ensure l2 strtab is initialised */
1809         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1810                 ret = arm_smmu_init_l2_strtab(smmu, sid);
1811                 if (ret)
1812                         goto out_put_group;
1813         }
1814
1815         /* Resize the SID array for the group */
1816         smmu_group->num_sids++;
1817         sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1818                         GFP_KERNEL);
1819         if (!sids) {
1820                 smmu_group->num_sids--;
1821                 ret = -ENOMEM;
1822                 goto out_put_group;
1823         }
1824
1825         /* Add the new SID */
1826         sids[smmu_group->num_sids - 1] = sid;
1827         smmu_group->sids = sids;
1828         return 0;
1829
1830 out_put_group:
1831         iommu_group_put(group);
1832         return ret;
1833 }
1834
1835 static void arm_smmu_remove_device(struct device *dev)
1836 {
1837         iommu_group_remove_device(dev);
1838 }
1839
1840 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1841                                     enum iommu_attr attr, void *data)
1842 {
1843         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1844
1845         switch (attr) {
1846         case DOMAIN_ATTR_NESTING:
1847                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1848                 return 0;
1849         default:
1850                 return -ENODEV;
1851         }
1852 }
1853
1854 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1855                                     enum iommu_attr attr, void *data)
1856 {
1857         int ret = 0;
1858         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1859
1860         mutex_lock(&smmu_domain->init_mutex);
1861
1862         switch (attr) {
1863         case DOMAIN_ATTR_NESTING:
1864                 if (smmu_domain->smmu) {
1865                         ret = -EPERM;
1866                         goto out_unlock;
1867                 }
1868
1869                 if (*(int *)data)
1870                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1871                 else
1872                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1873
1874                 break;
1875         default:
1876                 ret = -ENODEV;
1877         }
1878
1879 out_unlock:
1880         mutex_unlock(&smmu_domain->init_mutex);
1881         return ret;
1882 }
1883
1884 static struct iommu_ops arm_smmu_ops = {
1885         .capable                = arm_smmu_capable,
1886         .domain_alloc           = arm_smmu_domain_alloc,
1887         .domain_free            = arm_smmu_domain_free,
1888         .attach_dev             = arm_smmu_attach_dev,
1889         .detach_dev             = arm_smmu_detach_dev,
1890         .map                    = arm_smmu_map,
1891         .unmap                  = arm_smmu_unmap,
1892         .iova_to_phys           = arm_smmu_iova_to_phys,
1893         .add_device             = arm_smmu_add_device,
1894         .remove_device          = arm_smmu_remove_device,
1895         .domain_get_attr        = arm_smmu_domain_get_attr,
1896         .domain_set_attr        = arm_smmu_domain_set_attr,
1897         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1898 };
1899
1900 /* Probing and initialisation functions */
1901 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1902                                    struct arm_smmu_queue *q,
1903                                    unsigned long prod_off,
1904                                    unsigned long cons_off,
1905                                    size_t dwords)
1906 {
1907         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1908
1909         q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1910         if (!q->base) {
1911                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1912                         qsz);
1913                 return -ENOMEM;
1914         }
1915
1916         q->prod_reg     = smmu->base + prod_off;
1917         q->cons_reg     = smmu->base + cons_off;
1918         q->ent_dwords   = dwords;
1919
1920         q->q_base  = Q_BASE_RWA;
1921         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1922         q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1923                      << Q_BASE_LOG2SIZE_SHIFT;
1924
1925         q->prod = q->cons = 0;
1926         return 0;
1927 }
1928
1929 static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu,
1930                                     struct arm_smmu_queue *q)
1931 {
1932         size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3;
1933
1934         dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma);
1935 }
1936
1937 static void arm_smmu_free_queues(struct arm_smmu_device *smmu)
1938 {
1939         arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
1940         arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
1941
1942         if (smmu->features & ARM_SMMU_FEAT_PRI)
1943                 arm_smmu_free_one_queue(smmu, &smmu->priq.q);
1944 }
1945
1946 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1947 {
1948         int ret;
1949
1950         /* cmdq */
1951         spin_lock_init(&smmu->cmdq.lock);
1952         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1953                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1954         if (ret)
1955                 goto out;
1956
1957         /* evtq */
1958         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1959                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1960         if (ret)
1961                 goto out_free_cmdq;
1962
1963         /* priq */
1964         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
1965                 return 0;
1966
1967         ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
1968                                       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
1969         if (ret)
1970                 goto out_free_evtq;
1971
1972         return 0;
1973
1974 out_free_evtq:
1975         arm_smmu_free_one_queue(smmu, &smmu->evtq.q);
1976 out_free_cmdq:
1977         arm_smmu_free_one_queue(smmu, &smmu->cmdq.q);
1978 out:
1979         return ret;
1980 }
1981
1982 static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu)
1983 {
1984         int i;
1985         size_t size;
1986         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1987
1988         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1989         for (i = 0; i < cfg->num_l1_ents; ++i) {
1990                 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i];
1991
1992                 if (!desc->l2ptr)
1993                         continue;
1994
1995                 dma_free_coherent(smmu->dev, size, desc->l2ptr,
1996                                   desc->l2ptr_dma);
1997         }
1998 }
1999
2000 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2001 {
2002         unsigned int i;
2003         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2004         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2005         void *strtab = smmu->strtab_cfg.strtab;
2006
2007         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2008         if (!cfg->l1_desc) {
2009                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2010                 return -ENOMEM;
2011         }
2012
2013         for (i = 0; i < cfg->num_l1_ents; ++i) {
2014                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2015                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2016         }
2017
2018         return 0;
2019 }
2020
2021 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2022 {
2023         void *strtab;
2024         u64 reg;
2025         u32 size, l1size;
2026         int ret;
2027         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2028
2029         /*
2030          * If we can resolve everything with a single L2 table, then we
2031          * just need a single L1 descriptor. Otherwise, calculate the L1
2032          * size, capped to the SIDSIZE.
2033          */
2034         if (smmu->sid_bits < STRTAB_SPLIT) {
2035                 size = 0;
2036         } else {
2037                 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2038                 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2039         }
2040         cfg->num_l1_ents = 1 << size;
2041
2042         size += STRTAB_SPLIT;
2043         if (size < smmu->sid_bits)
2044                 dev_warn(smmu->dev,
2045                          "2-level strtab only covers %u/%u bits of SID\n",
2046                          size, smmu->sid_bits);
2047
2048         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2049         strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2050                                      GFP_KERNEL);
2051         if (!strtab) {
2052                 dev_err(smmu->dev,
2053                         "failed to allocate l1 stream table (%u bytes)\n",
2054                         size);
2055                 return -ENOMEM;
2056         }
2057         cfg->strtab = strtab;
2058
2059         /* Configure strtab_base_cfg for 2 levels */
2060         reg  = STRTAB_BASE_CFG_FMT_2LVL;
2061         reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2062                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2063         reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2064                 << STRTAB_BASE_CFG_SPLIT_SHIFT;
2065         cfg->strtab_base_cfg = reg;
2066
2067         ret = arm_smmu_init_l1_strtab(smmu);
2068         if (ret)
2069                 dma_free_coherent(smmu->dev,
2070                                   l1size,
2071                                   strtab,
2072                                   cfg->strtab_dma);
2073         return ret;
2074 }
2075
2076 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2077 {
2078         void *strtab;
2079         u64 reg;
2080         u32 size;
2081         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2082
2083         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2084         strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2085                                      GFP_KERNEL);
2086         if (!strtab) {
2087                 dev_err(smmu->dev,
2088                         "failed to allocate linear stream table (%u bytes)\n",
2089                         size);
2090                 return -ENOMEM;
2091         }
2092         cfg->strtab = strtab;
2093         cfg->num_l1_ents = 1 << smmu->sid_bits;
2094
2095         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2096         reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2097         reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2098                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2099         cfg->strtab_base_cfg = reg;
2100
2101         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2102         return 0;
2103 }
2104
2105 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2106 {
2107         u64 reg;
2108         int ret;
2109
2110         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2111                 ret = arm_smmu_init_strtab_2lvl(smmu);
2112         else
2113                 ret = arm_smmu_init_strtab_linear(smmu);
2114
2115         if (ret)
2116                 return ret;
2117
2118         /* Set the strtab base address */
2119         reg  = smmu->strtab_cfg.strtab_dma &
2120                STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2121         reg |= STRTAB_BASE_RA;
2122         smmu->strtab_cfg.strtab_base = reg;
2123
2124         /* Allocate the first VMID for stage-2 bypass STEs */
2125         set_bit(0, smmu->vmid_map);
2126         return 0;
2127 }
2128
2129 static void arm_smmu_free_strtab(struct arm_smmu_device *smmu)
2130 {
2131         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2132         u32 size = cfg->num_l1_ents;
2133
2134         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2135                 arm_smmu_free_l2_strtab(smmu);
2136                 size *= STRTAB_L1_DESC_DWORDS << 3;
2137         } else {
2138                 size *= STRTAB_STE_DWORDS * 3;
2139         }
2140
2141         dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma);
2142 }
2143
2144 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2145 {
2146         int ret;
2147
2148         ret = arm_smmu_init_queues(smmu);
2149         if (ret)
2150                 return ret;
2151
2152         ret = arm_smmu_init_strtab(smmu);
2153         if (ret)
2154                 goto out_free_queues;
2155
2156         return 0;
2157
2158 out_free_queues:
2159         arm_smmu_free_queues(smmu);
2160         return ret;
2161 }
2162
2163 static void arm_smmu_free_structures(struct arm_smmu_device *smmu)
2164 {
2165         arm_smmu_free_strtab(smmu);
2166         arm_smmu_free_queues(smmu);
2167 }
2168
2169 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2170                                    unsigned int reg_off, unsigned int ack_off)
2171 {
2172         u32 reg;
2173
2174         writel_relaxed(val, smmu->base + reg_off);
2175         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2176                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2177 }
2178
2179 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2180 {
2181         int ret, irq;
2182         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2183
2184         /* Disable IRQs first */
2185         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2186                                       ARM_SMMU_IRQ_CTRLACK);
2187         if (ret) {
2188                 dev_err(smmu->dev, "failed to disable irqs\n");
2189                 return ret;
2190         }
2191
2192         /* Clear the MSI address regs */
2193         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2194         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2195
2196         /* Request wired interrupt lines */
2197         irq = smmu->evtq.q.irq;
2198         if (irq) {
2199                 ret = devm_request_threaded_irq(smmu->dev, irq,
2200                                                 arm_smmu_evtq_handler,
2201                                                 arm_smmu_evtq_thread,
2202                                                 0, "arm-smmu-v3-evtq", smmu);
2203                 if (IS_ERR_VALUE(ret))
2204                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2205         }
2206
2207         irq = smmu->cmdq.q.irq;
2208         if (irq) {
2209                 ret = devm_request_irq(smmu->dev, irq,
2210                                        arm_smmu_cmdq_sync_handler, 0,
2211                                        "arm-smmu-v3-cmdq-sync", smmu);
2212                 if (IS_ERR_VALUE(ret))
2213                         dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2214         }
2215
2216         irq = smmu->gerr_irq;
2217         if (irq) {
2218                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2219                                        0, "arm-smmu-v3-gerror", smmu);
2220                 if (IS_ERR_VALUE(ret))
2221                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2222         }
2223
2224         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2225                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2226
2227                 irq = smmu->priq.q.irq;
2228                 if (irq) {
2229                         ret = devm_request_threaded_irq(smmu->dev, irq,
2230                                                         arm_smmu_priq_handler,
2231                                                         arm_smmu_priq_thread,
2232                                                         0, "arm-smmu-v3-priq",
2233                                                         smmu);
2234                         if (IS_ERR_VALUE(ret))
2235                                 dev_warn(smmu->dev,
2236                                          "failed to enable priq irq\n");
2237                         else
2238                                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2239                 }
2240         }
2241
2242         /* Enable interrupt generation on the SMMU */
2243         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2244                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2245         if (ret)
2246                 dev_warn(smmu->dev, "failed to enable irqs\n");
2247
2248         return 0;
2249 }
2250
2251 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2252 {
2253         int ret;
2254
2255         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2256         if (ret)
2257                 dev_err(smmu->dev, "failed to clear cr0\n");
2258
2259         return ret;
2260 }
2261
2262 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2263 {
2264         int ret;
2265         u32 reg, enables;
2266         struct arm_smmu_cmdq_ent cmd;
2267
2268         /* Clear CR0 and sync (disables SMMU and queue processing) */
2269         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2270         if (reg & CR0_SMMUEN)
2271                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2272
2273         ret = arm_smmu_device_disable(smmu);
2274         if (ret)
2275                 return ret;
2276
2277         /* CR1 (table and queue memory attributes) */
2278         reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2279               (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2280               (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2281               (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2282               (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2283               (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2284         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2285
2286         /* CR2 (random crap) */
2287         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2288         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2289
2290         /* Stream table */
2291         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2292                        smmu->base + ARM_SMMU_STRTAB_BASE);
2293         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2294                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2295
2296         /* Command queue */
2297         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2298         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2299         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2300
2301         enables = CR0_CMDQEN;
2302         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2303                                       ARM_SMMU_CR0ACK);
2304         if (ret) {
2305                 dev_err(smmu->dev, "failed to enable command queue\n");
2306                 return ret;
2307         }
2308
2309         /* Invalidate any cached configuration */
2310         cmd.opcode = CMDQ_OP_CFGI_ALL;
2311         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2312         cmd.opcode = CMDQ_OP_CMD_SYNC;
2313         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2314
2315         /* Invalidate any stale TLB entries */
2316         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2317                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2318                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2319         }
2320
2321         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2322         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2323         cmd.opcode = CMDQ_OP_CMD_SYNC;
2324         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2325
2326         /* Event queue */
2327         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2328         writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2329         writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2330
2331         enables |= CR0_EVTQEN;
2332         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2333                                       ARM_SMMU_CR0ACK);
2334         if (ret) {
2335                 dev_err(smmu->dev, "failed to enable event queue\n");
2336                 return ret;
2337         }
2338
2339         /* PRI queue */
2340         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2341                 writeq_relaxed(smmu->priq.q.q_base,
2342                                smmu->base + ARM_SMMU_PRIQ_BASE);
2343                 writel_relaxed(smmu->priq.q.prod,
2344                                smmu->base + ARM_SMMU_PRIQ_PROD);
2345                 writel_relaxed(smmu->priq.q.cons,
2346                                smmu->base + ARM_SMMU_PRIQ_CONS);
2347
2348                 enables |= CR0_PRIQEN;
2349                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2350                                               ARM_SMMU_CR0ACK);
2351                 if (ret) {
2352                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2353                         return ret;
2354                 }
2355         }
2356
2357         ret = arm_smmu_setup_irqs(smmu);
2358         if (ret) {
2359                 dev_err(smmu->dev, "failed to setup irqs\n");
2360                 return ret;
2361         }
2362
2363         /* Enable the SMMU interface */
2364         enables |= CR0_SMMUEN;
2365         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2366                                       ARM_SMMU_CR0ACK);
2367         if (ret) {
2368                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2369                 return ret;
2370         }
2371
2372         return 0;
2373 }
2374
2375 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2376 {
2377         u32 reg;
2378         bool coherent;
2379         unsigned long pgsize_bitmap = 0;
2380
2381         /* IDR0 */
2382         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2383
2384         /* 2-level structures */
2385         if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2386                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2387
2388         if (reg & IDR0_CD2L)
2389                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2390
2391         /*
2392          * Translation table endianness.
2393          * We currently require the same endianness as the CPU, but this
2394          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2395          */
2396         switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2397         case IDR0_TTENDIAN_MIXED:
2398                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2399                 break;
2400 #ifdef __BIG_ENDIAN
2401         case IDR0_TTENDIAN_BE:
2402                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2403                 break;
2404 #else
2405         case IDR0_TTENDIAN_LE:
2406                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2407                 break;
2408 #endif
2409         default:
2410                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2411                 return -ENXIO;
2412         }
2413
2414         /* Boolean feature flags */
2415         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2416                 smmu->features |= ARM_SMMU_FEAT_PRI;
2417
2418         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2419                 smmu->features |= ARM_SMMU_FEAT_ATS;
2420
2421         if (reg & IDR0_SEV)
2422                 smmu->features |= ARM_SMMU_FEAT_SEV;
2423
2424         if (reg & IDR0_MSI)
2425                 smmu->features |= ARM_SMMU_FEAT_MSI;
2426
2427         if (reg & IDR0_HYP)
2428                 smmu->features |= ARM_SMMU_FEAT_HYP;
2429
2430         /*
2431          * The dma-coherent property is used in preference to the ID
2432          * register, but warn on mismatch.
2433          */
2434         coherent = of_dma_is_coherent(smmu->dev->of_node);
2435         if (coherent)
2436                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2437
2438         if (!!(reg & IDR0_COHACC) != coherent)
2439                 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2440                          coherent ? "true" : "false");
2441
2442         if (reg & IDR0_STALL_MODEL)
2443                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2444
2445         if (reg & IDR0_S1P)
2446                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2447
2448         if (reg & IDR0_S2P)
2449                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2450
2451         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2452                 dev_err(smmu->dev, "no translation support!\n");
2453                 return -ENXIO;
2454         }
2455
2456         /* We only support the AArch64 table format at present */
2457         if ((reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) < IDR0_TTF_AARCH64) {
2458                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2459                 return -ENXIO;
2460         }
2461
2462         /* ASID/VMID sizes */
2463         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2464         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2465
2466         /* IDR1 */
2467         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2468         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2469                 dev_err(smmu->dev, "embedded implementation not supported\n");
2470                 return -ENXIO;
2471         }
2472
2473         /* Queue sizes, capped at 4k */
2474         smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2475                                        reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2476         if (!smmu->cmdq.q.max_n_shift) {
2477                 /* Odd alignment restrictions on the base, so ignore for now */
2478                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2479                 return -ENXIO;
2480         }
2481
2482         smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2483                                        reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2484         smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2485                                        reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2486
2487         /* SID/SSID sizes */
2488         smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2489         smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2490
2491         /* IDR5 */
2492         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2493
2494         /* Maximum number of outstanding stalls */
2495         smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2496                                 & IDR5_STALL_MAX_MASK;
2497
2498         /* Page sizes */
2499         if (reg & IDR5_GRAN64K)
2500                 pgsize_bitmap |= SZ_64K | SZ_512M;
2501         if (reg & IDR5_GRAN16K)
2502                 pgsize_bitmap |= SZ_16K | SZ_32M;
2503         if (reg & IDR5_GRAN4K)
2504                 pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2505
2506         arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
2507
2508         /* Output address size */
2509         switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2510         case IDR5_OAS_32_BIT:
2511                 smmu->oas = 32;
2512                 break;
2513         case IDR5_OAS_36_BIT:
2514                 smmu->oas = 36;
2515                 break;
2516         case IDR5_OAS_40_BIT:
2517                 smmu->oas = 40;
2518                 break;
2519         case IDR5_OAS_42_BIT:
2520                 smmu->oas = 42;
2521                 break;
2522         case IDR5_OAS_44_BIT:
2523                 smmu->oas = 44;
2524                 break;
2525         default:
2526                 dev_info(smmu->dev,
2527                         "unknown output address size. Truncating to 48-bit\n");
2528                 /* Fallthrough */
2529         case IDR5_OAS_48_BIT:
2530                 smmu->oas = 48;
2531         }
2532
2533         /* Set the DMA mask for our table walker */
2534         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2535                 dev_warn(smmu->dev,
2536                          "failed to set DMA mask for table walker\n");
2537
2538         if (!smmu->ias)
2539                 smmu->ias = smmu->oas;
2540
2541         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2542                  smmu->ias, smmu->oas, smmu->features);
2543         return 0;
2544 }
2545
2546 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2547 {
2548         int irq, ret;
2549         struct resource *res;
2550         struct arm_smmu_device *smmu;
2551         struct device *dev = &pdev->dev;
2552
2553         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2554         if (!smmu) {
2555                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2556                 return -ENOMEM;
2557         }
2558         smmu->dev = dev;
2559
2560         /* Base address */
2561         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2562         if (resource_size(res) + 1 < SZ_128K) {
2563                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2564                 return -EINVAL;
2565         }
2566
2567         smmu->base = devm_ioremap_resource(dev, res);
2568         if (IS_ERR(smmu->base))
2569                 return PTR_ERR(smmu->base);
2570
2571         /* Interrupt lines */
2572         irq = platform_get_irq_byname(pdev, "eventq");
2573         if (irq > 0)
2574                 smmu->evtq.q.irq = irq;
2575
2576         irq = platform_get_irq_byname(pdev, "priq");
2577         if (irq > 0)
2578                 smmu->priq.q.irq = irq;
2579
2580         irq = platform_get_irq_byname(pdev, "cmdq-sync");
2581         if (irq > 0)
2582                 smmu->cmdq.q.irq = irq;
2583
2584         irq = platform_get_irq_byname(pdev, "gerror");
2585         if (irq > 0)
2586                 smmu->gerr_irq = irq;
2587
2588         parse_driver_options(smmu);
2589
2590         /* Probe the h/w */
2591         ret = arm_smmu_device_probe(smmu);
2592         if (ret)
2593                 return ret;
2594
2595         /* Initialise in-memory data structures */
2596         ret = arm_smmu_init_structures(smmu);
2597         if (ret)
2598                 return ret;
2599
2600         /* Reset the device */
2601         ret = arm_smmu_device_reset(smmu);
2602         if (ret)
2603                 goto out_free_structures;
2604
2605         /* Record our private device structure */
2606         platform_set_drvdata(pdev, smmu);
2607         return 0;
2608
2609 out_free_structures:
2610         arm_smmu_free_structures(smmu);
2611         return ret;
2612 }
2613
2614 static int arm_smmu_device_remove(struct platform_device *pdev)
2615 {
2616         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2617
2618         arm_smmu_device_disable(smmu);
2619         arm_smmu_free_structures(smmu);
2620         return 0;
2621 }
2622
2623 static struct of_device_id arm_smmu_of_match[] = {
2624         { .compatible = "arm,smmu-v3", },
2625         { },
2626 };
2627 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2628
2629 static struct platform_driver arm_smmu_driver = {
2630         .driver = {
2631                 .name           = "arm-smmu-v3",
2632                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2633         },
2634         .probe  = arm_smmu_device_dt_probe,
2635         .remove = arm_smmu_device_remove,
2636 };
2637
2638 static int __init arm_smmu_init(void)
2639 {
2640         struct device_node *np;
2641         int ret;
2642
2643         np = of_find_matching_node(NULL, arm_smmu_of_match);
2644         if (!np)
2645                 return 0;
2646
2647         of_node_put(np);
2648
2649         ret = platform_driver_register(&arm_smmu_driver);
2650         if (ret)
2651                 return ret;
2652
2653         return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2654 }
2655
2656 static void __exit arm_smmu_exit(void)
2657 {
2658         return platform_driver_unregister(&arm_smmu_driver);
2659 }
2660
2661 subsys_initcall(arm_smmu_init);
2662 module_exit(arm_smmu_exit);
2663
2664 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2665 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2666 MODULE_LICENSE("GPL v2");